Databricks Integrations with Python
Problem
You are building an interactive Python application which needs to act as the person visiting the application when accessing a private Databricks resource. The content must authenticate to Databricks using the viewer’s Databricks credentials. For example, reading data from a Databricks Unity Catalog table that has row-level permissions and each content viewer has a different level of data access.
Solution
The examples below illustrate how to use the OAuth integrations feature of Posit Connect to delegate authorization to the data provider. These examples use the viewer’s OAuth access token to call the Databricks API on behalf of the content viewer, allowing content to access private Databricks resources as the content viewer.
When publishing the content to Connect make sure the following environment variables are set for the deployed content:
DATABRICKS_HOST
DATABRICKS_PATH
requirements.txt
-sql-connector==3.3.0
databricks-sdk==0.29.0
databricks==2.15.0
dash-sdk>=0.4.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import flask
import pandas as pd
from dash import Dash, Input, Output, dash_table, html
from databricks import sql
from databricks.sdk.core import ApiClient, Config, databricks_cli
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import PositCredentialsStrategy
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
df = Dash(__name__)
app
= html.Div(
app.layout =[
childrenid="greeting", children="Loading..."),
html.Div(id="table-container"),
html.Div(id="dummy"), # dummy element to trigger callback on page load
html.Div(
]
)
@app.callback(
"table-container", "children"), Output("greeting", "children")],
[Output("dummy", "children"),
Input(
)def update_page(_):
"""
Dash example application that shows user information and
the first few rows from a table hosted in Databricks.
"""
= flask.request.headers.get("Posit-Connect-User-Session-Token")
session_token = PositCredentialsStrategy(
posit_strategy =databricks_cli, user_session_token=session_token
local_strategy
)= Config(
cfg =DATABRICKS_HOST_URL,
host# uses Posit's custom credential_strategy if running on Connect,
# otherwise falls back to the strategy defined by local_strategy
=posit_strategy,
credentials_strategy
)
def get_greeting():
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user_info return f"Hello, {databricks_user_info.display_name}!"
def get_table():
global df
if df is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows = pd.DataFrame(
df =[col[0] for col in cursor.description]
rows, columns
)
= dash_table.DataTable(
table id="table",
=[{"name": i, "id": i} for i in df.columns],
columns=df.to_dict("records"),
data={"overflowX": "scroll"},
style_table
)return table
return get_table(), get_greeting()
if __name__ == "__main__":
=True) app.run(debug
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
python app.py
requirements.txt
-sql-connector==3.3.0
databricks-sdk==0.29.0
databricks==0.110.0
fastapi-sdk>=0.4.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
from typing import Annotated
from databricks import sql
from databricks.sdk.core import Config, databricks_cli
from fastapi import FastAPI, Header
from fastapi.responses import JSONResponse
from posit.connect.external.databricks import PositCredentialsStrategy
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
rows = FastAPI()
app
@app.get("/fares")
async def get_fares(
str | None, Header()] = None,
posit_connect_user_session_token: Annotated[-> JSONResponse:
) """
FastAPI example API that returns the first few rows from
a table hosted in Databricks.
"""
global rows
= PositCredentialsStrategy(
posit_strategy =databricks_cli,
local_strategy=posit_connect_user_session_token,
user_session_token
)= Config(
cfg =DATABRICKS_HOST_URL,
host# uses Posit's custom credential_strategy if running on Connect,
# otherwise falls back to the strategy defined by local_strategy
=posit_strategy,
credentials_strategy
)
if rows is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows
return [row.asDict() for row in rows]
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
uvicorn app:app --reload
requirements.txt
-sql-connector==3.3.0
databricks-sdk==0.29.0
databricks==3.0.2
flask-sdk>=0.4.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
from databricks import sql
from databricks.sdk.core import Config, databricks_cli
from flask import Flask, request
from posit.connect.external.databricks import PositCredentialsStrategy
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
rows = Flask(__name__)
app
@app.route("/")
def usage():
return "<p>Try: <pre>GET /fares<pre></p>"
@app.route("/fares")
def get_fares():
"""
Flask example API that returns the first few rows from
a table hosted in Databricks.
"""
global rows
= request.headers.get("Posit-Connect-User-Session-Token")
session_token = PositCredentialsStrategy(
posit_strategy =databricks_cli, user_session_token=session_token
local_strategy
)= Config(
cfg =DATABRICKS_HOST_URL,
host# uses Posit's custom credential_strategy if running on Connect,
# otherwise falls back to the strategy defined by local_strategy
=posit_strategy,
credentials_strategy
)
if rows is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows
return [row.asDict() for row in rows]
if __name__ == "__main__":
=True) app.run(debug
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
flask --app app run
requirements.txt
-sql-connector==3.3.0
databricks-sdk==0.29.0
databricks==0.7.1
shiny-sdk>=0.4.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import pandas as pd
from databricks import sql
from databricks.sdk.core import ApiClient, Config, databricks_cli
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import PositCredentialsStrategy
from shiny import App, Inputs, Outputs, Session, render, ui
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= ui.page_fluid(ui.output_text("text"), ui.output_data_frame("result"))
app_ui
def server(i: Inputs, o: Outputs, session: Session):
"""
Shiny for Python example application that shows user information and
the first few rows from a table hosted in Databricks.
"""
= session.http_conn.headers.get("Posit-Connect-User-Session-Token")
session_token = PositCredentialsStrategy(
posit_strategy =databricks_cli, user_session_token=session_token
local_strategy
)= Config(
cfg =DATABRICKS_HOST_URL,
host# uses Posit's custom credential_strategy if running on Connect,
# otherwise falls back to the strategy defined by local_strategy
=posit_strategy,
credentials_strategy
)
@render.data_frame
def result():
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows = pd.DataFrame(rows, columns=[col[0] for col in cursor.description])
df return df
@render.text
def text():
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user_info return f"Hello, {databricks_user_info.display_name}!"
= App(app_ui, server) app
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
shiny run app.py
requirements.txt
-sql-connector==3.3.0
databricks-sdk==0.29.0
databricks==1.37.0
streamlit-sdk>=0.4.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import pandas as pd
import streamlit as st
from databricks import sql
from databricks.sdk.core import ApiClient, Config, databricks_cli
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import PositCredentialsStrategy
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= st.context.headers.get("Posit-Connect-User-Session-Token")
session_token = PositCredentialsStrategy(
posit_strategy =databricks_cli, user_session_token=session_token
local_strategy
)= Config(
cfg =DATABRICKS_HOST_URL,
host# uses Posit's custom credential_strategy if running on Connect,
# otherwise falls back to the strategy defined by local_strategy
=posit_strategy,
credentials_strategy
)
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user f"Hello, {databricks_user.display_name}!")
st.write(
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=posit_strategy.sql_credentials_provider(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
"SELECT * FROM samples.nyctaxi.trips LIMIT 10;")
cursor.execute(= cursor.fetchall()
result st.table(pd.DataFrame(result))
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
streamlit run app.py