Google BigQuery project explorer with R
Problem
You (a publisher) are frequently asked by teammates to grab information from an internal BigQuery dataset. After navigating to the BigQuery browser console for the sixth time in one day you decide to empower folks on your team to execute the queries themselves, but outside of the unwieldy, tab-laden BigQuery interface and using their own credentials for auditing purposes.
Solution
Using the Google BigQuery Viewer OAuth integration configured by your Posit Connect administrator you are able to publish a Shiny application that pulls in resources from BigQuery as the logged-in user.
People within your organization are able to retrieve projects available to them in Google, list the BigQuery datasets associated with a given project, and execute a provided query against a project.
Concerned about accidental breaking queries, you limit the query input to SELECT
statements and display a friendly message if someone tries to execute something else or append an additional statement to a SELECT
query using ;
as a separator. If your Connect administrator used read-only scopes in the BigQuery integration this would not be necessary.
If a given request fails then an error is displayed, and otherwise the results are shown to the user.
app.R
library(shiny)
library(httr2)
library(bslib)
library(connectapi)
<- page_sidebar(
ui title = "BigQuery Project Explorer",
sidebar = sidebar(
title = "BigQuery Projects",
textInput("project", "Project to query or retrieve datasets from:", placeholder = "my-google-project"),
actionButton("get_datasets", "Get Datasets"),
textInput("query", "Query to execute against project:", placeholder = "SELECT name FROM `my_dataset.my_table`"),
actionButton("query_project", "Query Project"),
actionButton("list_projects", "List Project IDs")
),layout_columns(
card(
card_header("Results"),
htmlOutput("results")
)
)
)
<- function(input, output, session) {
server
# check if running on Posit Connect
# note: use RSTUDIO_PRODUCT (deprecated) for Connect versions < 2025.02.0
if (Sys.getenv("POSIT_PRODUCT") == "CONNECT") {
# initialize Connect API client
<- connect()
client # read the user-session-token header
<- session$request$HTTP_POSIT_CONNECT_USER_SESSION_TOKEN
user_session_token # grab the OAuth Integration access token using the session token
<- get_oauth_credentials(client, user_session_token)
credentials <- credentials$access_token
token else {
} # grab the access token from the GOOGLE_TOKEN env var if running locally
<- Sys.getenv("GOOGLE_TOKEN")
token
}
# set the bigquery service endpoint base URL
<- "https://bigquery.googleapis.com"
bq_endpoint # set the list projects endpoint
<- paste0(bq_endpoint, "/bigquery/v2/projects/")
projects
# list projects when List Projects button is pushed
observeEvent(input$list_projects, {
# retrieve and list projects available to the user
<- httr2::request(projects) |>
resp ::req_headers("Accept" = "application/json") |>
httr2::req_auth_bearer_token(token) |>
httr2::req_perform() |>
httr2::resp_body_json()
httr2
# parse out project IDs and display to user
<- sapply(resp$projects, function(project) project$projectReference$projectId)
project_ids $results <- renderUI({
output<- "The following projects are available to you: "
greeting <- paste0(project_ids, collapse = '<br/>')
project_ids HTML(paste0(greeting, "<br/>", project_ids))
})
})
# retrieve datasets when the user enters a project and hits Get Datasets button
observeEvent(input$get_datasets, {
# do not allow an empty value for the project
req(input$project)
<- input$project
project
# form the datasets by project URL to request
<- paste0(projects, project, "/datasets")
datasets
# make the request
<- httr2::request(datasets) |>
resp ::req_headers("Accept" = "application/json") |>
httr2::req_auth_bearer_token(token) |>
httr2# to avoid HTTP response error codes being surfaced as R errors
::req_error(is_error = ~FALSE) |>
httr2::req_perform()
httr2
# check the HTTP response code
<- httr2::resp_status(resp)
http_code if (http_code == 200) {
# format response body now that we have the HTTP status code
<- httr2::resp_body_json(resp)
resp
# display the datasets for the provided project
$results <- renderUI({
output<- sapply(resp$datasets, function(dataset) dataset$datasetReference$datasetId)
datasets <- paste0(datasets, collapse = '<br/>')
datasets HTML(paste0("Datasets for project ", project, ": ", "<br/>", datasets))
})}
# output the unexpected HTTP code
if (http_code != 200) {
$results <- renderUI({
output<- HTML(paste0("Received non-200 HTTP response code: ", http_code))
error
error
})}
})
# execute provided SELECT query when Query Project button pushed
observeEvent(input$query_project, {
# do not allow an empty value for the project
req(input$project)
<- input$project
project
# do not allow an empty query
req(input$query)
<- input$query
query
# only allow SELECT statements and not other query types
if (!grepl("^\\s*SELECT\\s", query, ignore.case = TRUE)) {
showNotification("Only SELECT statements are allowed.")
else if (grepl(";|\\b(DELETE|DROP|INSERT|UPDATE|ALTER)\\b", query, ignore.case = TRUE)) {
} # prohibit multiple queries and subqueries which may perform mutations
# not necessary if your BigQuery OAuth integration uses read-only scopes
stop("I see what you're trying to do.")
else {
} # form the queries endpoint using projects URL and project ID
<- paste0(projects, project, "/queries")
queries
# form the request body using the provided SQL query
<- list(
body query = query,
useLegacySql = FALSE,
maxResults = 10,
timeoutMs = 10000
)
# make the request
<- httr2::request(queries) |>
resp ::req_headers("Accept" = "application/json") |>
httr2::req_auth_bearer_token(token) |>
httr2# to avoid HTTP response error codes being surfaced as R errors
::req_error(is_error = ~FALSE) |>
httr2::req_body_json(body) |>
httr2::req_perform()
httr2
# check the HTTP response code
<- httr2::resp_status(resp)
http_code if (http_code == 200) {
# format response body now that we have the HTTP status code
<- httr2::resp_body_json(resp)
resp
# display the rows returned from the queries endpoint
$results <- renderUI({
output<- sapply(resp$rows, function(row) row$f[[1]]$v)
rows <- paste0(rows, collapse = '<br/>')
row_values HTML(paste0("Rows returned for query ", query, ": ", "<br/>", row_values))
})else {
} # output the unexpected HTTP code
$results <- renderUI({
outputHTML(paste0("Received non-200 HTTP response code: ", http_code))
})
}
}
})
}
shinyApp(ui, server)
Running the app locally
Terminal
Sys.setenv(CONNECT_SERVER = "<connect-host>")
Sys.setenv(CONNECT_API_KEY = "<connect-api-key>")
# GOOGLE_TOKEN is only required when running the example locally
Sys.setenv(GOOGLE_TOKEN = "<google-token>")
::runApp() shiny