Databricks Service Account Integrations
Problem
You are building interactive or rendered content in Python or R which needs to access a private Databricks resource using a Service Principal. The content must authenticate to Databricks using the Service Principal’s credentials.
Solution
The examples below illustrates how to use the OAuth integrations feature of Posit Connect to delegate authorization to the data provider. These examples use the Service Principal’s OAuth access token to call the Databricks API, allowing content to access private Databricks resources.
When publishing the content to Connect make sure the following environment variables are set for the deployed content:
DATABRICKS_HOST
DATABRICKS_PATH
Make sure to set the following additional environment variables for local development:
CLIENT_ID
CLIENT_SECRET
from posit.connect.external.databricks import (
PositContentCredentialsStrategy,
PositLocalContentCredentialsStrategy,
)from databricks import sql
from databricks.sdk.core import ApiClient, Config
from databricks.sdk.service.iam import CurrentUserAPI
import os
import pandas as pd
= os.getenv('DATABRICKS_HOST')
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = f"https://{DATABRICKS_HOST}/oidc/v1/token"
TOKEN_ENDPOINT_URL
# Rather than relying on the Databricks CLI as a local strategy, we use
# PositLocalContentCredentialsStrategy as a drop-in replacement.
# Can be replaced with the Databricks CLI implementation when
# https://github.com/databricks/cli/issues/1939 is resolved.
= PositLocalContentCredentialsStrategy(
local_strategy =TOKEN_ENDPOINT_URL,
token_endpoint_url=os.getenv('CLIENT_ID'),
client_id=os.getenv('CLIENT_SECRET'),
client_secret
)= PositContentCredentialsStrategy(local_strategy=local_strategy)
posit_strategy = Config(host=DATABRICKS_HOST_URL, credentials_strategy=posit_strategy)
cfg
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user_info print(f"Hello, {databricks_user_info.display_name}!")
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query with sql.connect(
=DATABRICKS_HOST,
server_hostname=os.getenv('DATABRICKS_PATH'),
http_path=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows print(pd.DataFrame([row.asDict() for row in rows]))
Make sure to set the following additional environment variables for local development:
DATABRICKS_TOKEN
library(connectapi)
library(dbplyr)
library(dplyr)
library(httr2)
library(odbc)
# obtain a databricks OAuth token.
# 1. if running on Posit Connect, exchange Connect's content-session-token
# for the Service Principal's Databricks OAuth access token
# 2. if running locally, read the Databricks token from the
# DATABRICKS_TOKEN environment var
<- function() {
databricks_token <- Sys.getenv("DATABRICKS_HOST")
host <- NULL
token
# if running on Connect
if (Sys.getenv("RSTUDIO_PRODUCT") == "CONNECT") {
# init Posit Connect API client
<- connect()
client # exchange content session token for Service Account OAuth access token
# Connect makes the content session token available in the
# environment variable `CONNECT_CONTENT_SESSION_TOKEN`.
<- get_oauth_content_credentials(client)
credentials <- credentials$access_token
token else {
} <- Sys.getenv("DATABRICKS_TOKEN")
token
}
token
}
# call the databricks "current user" API to get
# information about the authenticated user.
<- function() {
databricks_user <- Sys.getenv("DATABRICKS_HOST")
host <- databricks_token()
token
<- file.path(
url paste0("https://", host),
"api/2.0/preview/scim/v2/Me",
fsep = "/"
)<- httr2::request(url) |>
resp ::req_auth_bearer_token(token) |>
httr2::req_retry(max_tries = 2) |>
httr2::req_perform()
httr2
<- httr2::resp_body_json(resp)
resp_body
resp_body
}
# execute a query against a data source in Databricks
<- function() {
databricks_data_query <- dbConnect(
con ::databricks(),
odbchttpPath = Sys.getenv("DATABRICKS_PATH"),
authMech = 11,
auth_flow = 0,
auth_accesstoken = databricks_token()
)
tbl(
con,in_catalog("samples", "nyctaxi", "trips")
|>
) head(10) |>
collect()
}
databricks_user()$displayName
databricks_data_query()