-
Notifications
You must be signed in to change notification settings - Fork 185
Description
Package Versions:
dbplyr: 2.5.0
bigrquery: 1.5.1
DBI: 1.2.3
R version: 4.5.1
Problem Description
When creating and uploading a new table to BigQuery using bq_table_create() and bq_table_upload(), subsequent queries using dplyr behave inconsistently.
If a dplyr remote table object is created using the fully qualified, three-part name (e.g., tbl(con, "project.dataset.table")), operations that only generate SQL to be run on the server (like count()) succeed.
However, operations that require downloading data into R (like collect() or head()) fail with the error: Error in as_bq_table(): ! 'name' must have 2 or 3 components.
This issue appears to be a string-parsing bug within the collect() method for tbl_BigQueryConnection objects, as downloading the same table data directly with bigrquery::bq_table_download() works perfectly.
Code below generated by Gemini, sorry, my actual case is proprietary
library(DBI)
library(bigrquery)
library(dplyr)
# 1. Define connection details and connect
# (Assumes user is already authenticated)
project_id <- "your-gcp-project-id" # <-- Change this
dataset_id <- "your_bq_dataset" # <-- Change this
billing_id <- "your-billing-project-id" # <-- Change this
table_id <- paste0("test_bug_table_", as.integer(Sys.time())) # Unique table name
con <- dbConnect(
bigrquery::bigquery(),
project = project_id,
dataset = dataset_id,
billing = billing_id
)
# 2. Create and upload a new table
temp_df <- data.frame(id = 1:2, value = c("a", "b"))
destination_table_ref <- bq_table(project_id, dataset_id, table_id)
bq_table_create(destination_table_ref, fields = as_bq_fields(temp_df))
bq_table_upload(destination_table_ref, values = temp_df)
# Give a moment for table to be available
Sys.sleep(5)
# 3. Create dplyr object using the FULLY QUALIFIED three-part name
full_name <- paste(project_id, dataset_id, table_id, sep = ".")
bq_sample <- tbl(con, full_name)
# 4. Demonstrate what works and what fails
# --- THIS WORKS ---
# SQL-only operations succeed
print(bq_sample %>% count())
#> # Source: SQL [1 x 1]
#> # Database: BigQueryConnection
#> n
#> <int>
#> 1 2
# --- THIS FAILS ---
# Data-retrieval operations fail
tryCatch({
bq_sample %>% head(5)
}, error = function(e) {
print(e)
rlang::last_trace()
})
# 5. Show the Workarounds
# --- WORKAROUND 1: Using bigrquery directly ---
# This proves the table is accessible and downloadable
message("\n--- Testing direct download with bq_table_download() ---")
print(bq_table_download(destination_table_ref, n_max = 5))
# --- WORKAROUND 2: Using only the table name in tbl() ---
# This is the recommended dplyr workflow and it works correctly
message("\n--- Testing dplyr with only table name in tbl() ---")
bq_sample_workaround <- tbl(con, table_id)
print(bq_sample_workaround %>% head(5))
# Clean up created table
bq_table_delete(destination_table_ref)
dbDisconnect(con)