Databricks Viewer Integrations with Python
Problem
You are building an interactive Python application which needs to act as the person visiting the application when accessing a private Databricks resource. The content must authenticate to Databricks using the viewer’s Databricks credentials. For example, reading data from a Databricks Unity Catalog table that has row-level permissions and each content viewer has a different level of data access.
Solution
The examples below illustrate how to use the OAuth integrations feature of Posit Connect to delegate authorization to the data provider. These examples use the viewer’s OAuth access token to call the Databricks API on behalf of the content viewer, allowing content to access private Databricks resources as the content viewer.
When publishing the content to Connect make sure the following environment variables are set for the deployed content:
DATABRICKS_HOST
DATABRICKS_PATH
requirements.txt
-sql-connector==4.0.0
databricks-sdk==0.45.0
databricks==3.0.0
dash-sdk==0.9.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import flask
import pandas as pd
from dash import Dash, Input, Output, dash_table, html
from databricks import sql
from databricks.sdk.core import ApiClient
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import (
databricks_config,
sql_credentials,
ConnectStrategy,
)
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
df = Dash(__name__)
app
= html.Div(
app.layout =[
childrenid="greeting", children="Loading..."),
html.Div(id="table-container"),
html.Div(id="dummy"), # dummy element to trigger callback on page load
html.Div(
]
)
@app.callback(
"table-container", "children"), Output("greeting", "children")],
[Output("dummy", "children"),
Input(
)def update_page(_):
"""
Dash example application that shows user information and
the first few rows from a table hosted in Databricks.
"""
= flask.request.headers.get("Posit-Connect-User-Session-Token")
session_token # databricks_config() will try to authenticate using a different method depending
# on which environment is detected at runtime.
# Content running on Posit Connect will use the provided posit_connect_strategy.
# Content running on Posit Workbench will use the provided posit_workbench_strategy.
# If neither Workbench nor Connect is detected then the posit_default_strategy is used as a fallback.
# The posit_default_strategy uses the same method of detecting credentials as the databricks-sdk.
# If your local environment is already configured via environment variables or a .databrickscfg then
# you likely do not need to override this value.
# For more information, see the examples from the posit-sdk
# https://github.com/posit-dev/posit-sdk-py/blob/v0.9.0/src/posit/connect/external/databricks.py#L248
= databricks_config(
cfg =ConnectStrategy(user_session_token=session_token),
posit_connect_strategy
)
def get_greeting():
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user_info return f"Hello, {databricks_user_info.display_name}!"
def get_table():
global df
if df is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=sql_credentials(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows = pd.DataFrame(
df =[col[0] for col in cursor.description]
rows, columns
)
= dash_table.DataTable(
table id="table",
=[{"name": i, "id": i} for i in df.columns],
columns=df.to_dict("records"),
data={"overflowX": "scroll"},
style_table
)return table
return get_table(), get_greeting()
if __name__ == "__main__":
=True) app.run(debug
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
python app.py
requirements.txt
-sql-connector==4.0.0
databricks-sdk==0.45.0
databricks==0.115.11
fastapi-sdk==0.9.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
from typing import Annotated
from databricks import sql
from fastapi import FastAPI, Header
from fastapi.responses import JSONResponse
from posit.connect.external.databricks import (
databricks_config,
sql_credentials,
ConnectStrategy,
)
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
rows = FastAPI()
app
@app.get("/fares")
async def get_fares(
str | None, Header()] = None,
posit_connect_user_session_token: Annotated[-> JSONResponse:
) """
FastAPI example API that returns the first few rows from
a table hosted in Databricks.
"""
global rows
# databricks_config() will try to authenticate using a different method depending
# on which environment is detected at runtime.
# Content running on Posit Connect will use the provided posit_connect_strategy.
# Content running on Posit Workbench will use the provided posit_workbench_strategy.
# If neither Workbench nor Connect is detected then the posit_default_strategy is used as a fallback.
# The posit_default_strategy uses the same method of detecting credentials as the databricks-sdk.
# If your local environment is already configured via environment variables or a .databrickscfg then
# you likely do not need to override this value.
# For more information, see the examples from the posit-sdk
# https://github.com/posit-dev/posit-sdk-py/blob/v0.9.0/src/posit/connect/external/databricks.py#L248
= databricks_config(
cfg =ConnectStrategy(
posit_connect_strategy=posit_connect_user_session_token
user_session_token
),
)
if rows is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=sql_credentials(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows
return [row.asDict() for row in rows]
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
uvicorn app:app --reload
requirements.txt
-sql-connector==4.0.0
databricks-sdk==0.45.0
databricks==3.1.0
flask-sdk==0.9.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
from databricks import sql
from flask import Flask, request
from posit.connect.external.databricks import (
databricks_config,
sql_credentials,
ConnectStrategy,
)
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= None
rows = Flask(__name__)
app
@app.route("/")
def usage():
return "<p>Try: <pre>GET /fares<pre></p>"
@app.route("/fares")
def get_fares():
"""
Flask example API that returns the first few rows from
a table hosted in Databricks.
"""
global rows
= request.headers.get("Posit-Connect-User-Session-Token")
session_token # databricks_config() will try to authenticate using a different method depending
# on which environment is detected at runtime.
# Content running on Posit Connect will use the provided posit_connect_strategy.
# Content running on Posit Workbench will use the provided posit_workbench_strategy.
# If neither Workbench nor Connect is detected then the posit_default_strategy is used as a fallback.
# The posit_default_strategy uses the same method of detecting credentials as the databricks-sdk.
# If your local environment is already configured via environment variables or a .databrickscfg then
# you likely do not need to override this value.
# For more information, see the examples from the posit-sdk
# https://github.com/posit-dev/posit-sdk-py/blob/v0.9.0/src/posit/connect/external/databricks.py#L248
= databricks_config(
cfg =ConnectStrategy(user_session_token=session_token),
posit_connect_strategy
)
if rows is None:
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=sql_credentials(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows
return [row.asDict() for row in rows]
if __name__ == "__main__":
=True) app.run(debug
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
flask --app app run
requirements.txt
-sql-connector==4.0.0
databricks-sdk==0.45.0
databricks==1.3.0
shiny-sdk==0.9.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import pandas as pd
from databricks import sql
from databricks.sdk.core import ApiClient
from databricks.sdk.service.iam import CurrentUserAPI
from shiny import App, Inputs, Outputs, Session, render, ui
from posit.connect.external.databricks import (
databricks_config,
sql_credentials,
ConnectStrategy,
)
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= ui.page_fluid(ui.output_text("text"), ui.output_data_frame("result"))
app_ui
def server(i: Inputs, o: Outputs, session: Session):
"""
Shiny for Python example application that shows user information and
the first few rows from a table hosted in Databricks.
"""
= session.http_conn.headers.get("Posit-Connect-User-Session-Token")
session_token # databricks_config() will try to authenticate using a different method depending
# on which environment is detected at runtime.
# Content running on Posit Connect will use the provided posit_connect_strategy.
# Content running on Posit Workbench will use the provided posit_workbench_strategy.
# If neither Workbench nor Connect is detected then the posit_default_strategy is used as a fallback.
# The posit_default_strategy uses the same method of detecting credentials as the databricks-sdk.
# If your local environment is already configured via environment variables or a .databrickscfg then
# you likely do not need to override this value.
# For more information, see the examples from the posit-sdk
# https://github.com/posit-dev/posit-sdk-py/blob/v0.9.0/src/posit/connect/external/databricks.py#L248
= databricks_config(
cfg =ConnectStrategy(user_session_token=session_token),
posit_connect_strategy
)
@render.data_frame
def result():
= "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
query
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=sql_credentials(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
cursor.execute(query)= cursor.fetchall()
rows = pd.DataFrame(rows, columns=[col[0] for col in cursor.description])
df return df
@render.text
def text():
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user_info return f"Hello, {databricks_user_info.display_name}!"
= App(app_ui, server) app
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
shiny run app.py
requirements.txt
-sql-connector==4.0.0
databricks-sdk==0.45.0
databricks==1.43.1
streamlit-sdk==0.9.0 posit
app.py
# -*- coding: utf-8 -*-
# mypy: ignore-errors
import os
import pandas as pd
import streamlit as st
from databricks import sql
from databricks.sdk.core import ApiClient
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import (
databricks_config,
sql_credentials,
ConnectStrategy,
)
= os.getenv("DATABRICKS_HOST")
DATABRICKS_HOST = f"https://{DATABRICKS_HOST}"
DATABRICKS_HOST_URL = os.getenv("DATABRICKS_PATH")
SQL_HTTP_PATH
= st.context.headers.get("Posit-Connect-User-Session-Token")
session_token # databricks_config() will try to authenticate using a different method depending
# on which environment is detected at runtime.
# Content running on Posit Connect will use the provided posit_connect_strategy.
# Content running on Posit Workbench will use the provided posit_workbench_strategy.
# If neither Workbench nor Connect is detected then the posit_default_strategy is used as a fallback.
# The posit_default_strategy uses the same method of detecting credentials as the databricks-sdk.
# If your local environment is already configured via environment variables or a .databrickscfg then
# you likely do not need to override this value.
# For more information, see the examples from the posit-sdk
# https://github.com/posit-dev/posit-sdk-py/blob/v0.9.0/src/posit/connect/external/databricks.py#L248
= databricks_config(
cfg =ConnectStrategy(user_session_token=session_token),
posit_connect_strategy
)
= CurrentUserAPI(ApiClient(cfg)).me()
databricks_user f"Hello, {databricks_user.display_name}!")
st.write(
with sql.connect(
=DATABRICKS_HOST,
server_hostname=SQL_HTTP_PATH,
http_path# https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
=sql_credentials(cfg),
credentials_provideras connection:
) with connection.cursor() as cursor:
"SELECT * FROM samples.nyctaxi.trips LIMIT 10;")
cursor.execute(= cursor.fetchall()
result st.table(pd.DataFrame(result))
Running the app locally
Terminal
export DATABRICKS_HOST="<databricks-sql-warehouse-server-hostname>"
export DATABRICKS_PATH="<databricks-sql-warehouse-http-path>"
streamlit run app.py