Google BigQuery project explorer with R

Problem

You (a publisher) are frequently asked by teammates to grab information from an internal BigQuery dataset. After navigating to the BigQuery browser console for the sixth time in one day you decide to empower folks on your team to execute the queries themselves, but outside of the unwieldy, tab-laden BigQuery interface and using their own credentials for auditing purposes.

Solution

Using the Google BigQuery Viewer OAuth integration configured by your Posit Connect administrator you are able to publish a Shiny application that pulls in resources from BigQuery as the logged-in user.

People within your organization are able to retrieve projects available to them in Google, list the BigQuery datasets associated with a given project, and execute a provided query against a project.

Concerned about accidental breaking queries, you limit the query input to SELECT statements and display a friendly message if someone tries to execute something else or append an additional statement to a SELECT query using ; as a separator. If your Connect administrator used read-only scopes in the BigQuery integration this would not be necessary.

If a given request fails then an error is displayed, and otherwise the results are shown to the user.

app.R
library(shiny)
library(httr2)
library(bslib)
library(connectapi)

ui <- page_sidebar(
  title = "BigQuery Project Explorer",
  sidebar = sidebar(
    title = "BigQuery Projects",
    textInput("project", "Project to query or retrieve datasets from:", placeholder = "my-google-project"),
    actionButton("get_datasets", "Get Datasets"),
    textInput("query", "Query to execute against project:", placeholder =  "SELECT name FROM `my_dataset.my_table`"),
    actionButton("query_project", "Query Project"),
    actionButton("list_projects", "List Project IDs")
    ),
  layout_columns(
    card(
      card_header("Results"),
      htmlOutput("results")
    )
  )
)


server <- function(input, output, session) {

  # check if running on Posit Connect
  # note: use RSTUDIO_PRODUCT (deprecated) for Connect versions < 2025.02.0
  if (Sys.getenv("POSIT_PRODUCT") == "CONNECT") {
    # initialize Connect API client
    client <- connect()
    # read the user-session-token header
    user_session_token <- session$request$HTTP_POSIT_CONNECT_USER_SESSION_TOKEN
    # grab the OAuth Integration access token using the session token
    credentials <- get_oauth_credentials(client, user_session_token)
    token <- credentials$access_token
  } else {
    # grab the access token from the GOOGLE_TOKEN env var if running locally
    token <- Sys.getenv("GOOGLE_TOKEN")
  }
  
  # set the bigquery service endpoint base URL
  bq_endpoint <- "https://bigquery.googleapis.com"
  # set the list projects endpoint
  projects <- paste0(bq_endpoint, "/bigquery/v2/projects/")
  
  # list projects when List Projects button is pushed
  observeEvent(input$list_projects, {
    # retrieve and list projects available to the user
    resp <- httr2::request(projects) |>
      httr2::req_headers("Accept" = "application/json") |>
      httr2::req_auth_bearer_token(token) |>
      httr2::req_perform() |>
      httr2::resp_body_json()
    
    # parse out project IDs and display to user
    project_ids <- sapply(resp$projects, function(project) project$projectReference$projectId)
    output$results <- renderUI({
      greeting <- "The following projects are available to you: "
      project_ids <- paste0(project_ids, collapse = '<br/>')
      HTML(paste0(greeting, "<br/>", project_ids))
    })
  })
  
  # retrieve datasets when the user enters a project and hits Get Datasets button
  observeEvent(input$get_datasets, {
    # do not allow an empty value for the project 
    req(input$project)
    project <- input$project
    
    # form the datasets by project URL to request
    datasets <- paste0(projects, project, "/datasets")
  
    # make the request
    resp <- httr2::request(datasets) |>
      httr2::req_headers("Accept" = "application/json") |>
      httr2::req_auth_bearer_token(token) |>
      # to avoid HTTP response error codes being surfaced as R errors
      httr2::req_error(is_error = ~FALSE) |>
      httr2::req_perform()
  
    # check the HTTP response code
    http_code <- httr2::resp_status(resp)
    if (http_code == 200) {
      # format response body now that we have the HTTP status code
      resp <- httr2::resp_body_json(resp)
      
      # display the datasets for the provided project 
      output$results <- renderUI({
        datasets <- sapply(resp$datasets, function(dataset) dataset$datasetReference$datasetId)
        datasets <- paste0(datasets, collapse = '<br/>')
        HTML(paste0("Datasets for project ", project, ": ", "<br/>", datasets))
      })}
  
    # output the unexpected HTTP code
    if (http_code != 200) {
    output$results <- renderUI({
      error <- HTML(paste0("Received non-200 HTTP response code: ", http_code))
      error
    })}
  })
  
  # execute provided SELECT query when Query Project button pushed 
  observeEvent(input$query_project, {
    # do not allow an empty value for the project 
    req(input$project)
    project <- input$project
    
    # do not allow an empty query
    req(input$query)
    query <- input$query
    
    # only allow SELECT statements and not other query types 
    if (!grepl("^\\s*SELECT\\s", query, ignore.case = TRUE)) {
      showNotification("Only SELECT statements are allowed.")
    } else if (grepl(";|\\b(DELETE|DROP|INSERT|UPDATE|ALTER)\\b", query, ignore.case = TRUE)) {
      # prohibit multiple queries and subqueries which may perform mutations
      # not necessary if your BigQuery OAuth integration uses read-only scopes
      stop("I see what you're trying to do.")
    } else {
      # form the queries endpoint using projects URL and project ID
      queries <- paste0(projects, project, "/queries")
    
      # form the request body using the provided SQL query
      body <- list(
        query = query,
        useLegacySql = FALSE,
        maxResults = 10,
        timeoutMs = 10000
      )
  
      # make the request
      resp <- httr2::request(queries) |>
        httr2::req_headers("Accept" = "application/json") |>
        httr2::req_auth_bearer_token(token) |>
        # to avoid HTTP response error codes being surfaced as R errors
        httr2::req_error(is_error = ~FALSE) |>
        httr2::req_body_json(body) |>
        httr2::req_perform()
    
      # check the HTTP response code
      http_code <- httr2::resp_status(resp)
      if (http_code == 200) {
        # format response body now that we have the HTTP status code
        resp <- httr2::resp_body_json(resp)
      
        # display the rows returned from the queries endpoint 
        output$results <- renderUI({
          rows <- sapply(resp$rows, function(row) row$f[[1]]$v)
          row_values <- paste0(rows, collapse = '<br/>')
          HTML(paste0("Rows returned for query ", query, ": ", "<br/>", row_values))
        })
      } else {
        # output the unexpected HTTP code
        output$results <- renderUI({
          HTML(paste0("Received non-200 HTTP response code: ", http_code))
        })
      }
    }
  })
}


shinyApp(ui, server)

Running the app locally

Terminal
Sys.setenv(CONNECT_SERVER = "<connect-host>")
Sys.setenv(CONNECT_API_KEY = "<connect-api-key>")
# GOOGLE_TOKEN is only required when running the example locally
Sys.setenv(GOOGLE_TOKEN = "<google-token>")
shiny::runApp()