|
| 1 | +<!DOCTYPE html> |
| 2 | + |
| 3 | +<html xmlns="http://www.w3.org/1999/xhtml"> |
| 4 | + |
| 5 | +<head> |
| 6 | + |
| 7 | +<meta charset="utf-8"> |
| 8 | +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 9 | +<meta name="generator" content="pandoc" /> |
| 10 | + |
| 11 | +<meta name="viewport" content="width=device-width, initial-scale=1"> |
| 12 | + |
| 13 | +<meta name="author" content="Mark Edmondson" /> |
| 14 | + |
| 15 | +<meta name="date" content="2016-08-11" /> |
| 16 | + |
| 17 | +<title>bigQueryR</title> |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +<style type="text/css">code{white-space: pre;}</style> |
| 22 | +<style type="text/css"> |
| 23 | +div.sourceCode { overflow-x: auto; } |
| 24 | +table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { |
| 25 | + margin: 0; padding: 0; vertical-align: baseline; border: none; } |
| 26 | +table.sourceCode { width: 100%; line-height: 100%; } |
| 27 | +td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; } |
| 28 | +td.sourceCode { padding-left: 5px; } |
| 29 | +code > span.kw { color: #007020; font-weight: bold; } /* Keyword */ |
| 30 | +code > span.dt { color: #902000; } /* DataType */ |
| 31 | +code > span.dv { color: #40a070; } /* DecVal */ |
| 32 | +code > span.bn { color: #40a070; } /* BaseN */ |
| 33 | +code > span.fl { color: #40a070; } /* Float */ |
| 34 | +code > span.ch { color: #4070a0; } /* Char */ |
| 35 | +code > span.st { color: #4070a0; } /* String */ |
| 36 | +code > span.co { color: #60a0b0; font-style: italic; } /* Comment */ |
| 37 | +code > span.ot { color: #007020; } /* Other */ |
| 38 | +code > span.al { color: #ff0000; font-weight: bold; } /* Alert */ |
| 39 | +code > span.fu { color: #06287e; } /* Function */ |
| 40 | +code > span.er { color: #ff0000; font-weight: bold; } /* Error */ |
| 41 | +code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ |
| 42 | +code > span.cn { color: #880000; } /* Constant */ |
| 43 | +code > span.sc { color: #4070a0; } /* SpecialChar */ |
| 44 | +code > span.vs { color: #4070a0; } /* VerbatimString */ |
| 45 | +code > span.ss { color: #bb6688; } /* SpecialString */ |
| 46 | +code > span.im { } /* Import */ |
| 47 | +code > span.va { color: #19177c; } /* Variable */ |
| 48 | +code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ |
| 49 | +code > span.op { color: #666666; } /* Operator */ |
| 50 | +code > span.bu { } /* BuiltIn */ |
| 51 | +code > span.ex { } /* Extension */ |
| 52 | +code > span.pp { color: #bc7a00; } /* Preprocessor */ |
| 53 | +code > span.at { color: #7d9029; } /* Attribute */ |
| 54 | +code > span.do { color: #ba2121; font-style: italic; } /* Documentation */ |
| 55 | +code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ |
| 56 | +code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ |
| 57 | +code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ |
| 58 | +</style> |
| 59 | + |
| 60 | + |
| 61 | + |
| 62 | +<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" /> |
| 63 | + |
| 64 | +</head> |
| 65 | + |
| 66 | +<body> |
| 67 | + |
| 68 | + |
| 69 | + |
| 70 | + |
| 71 | +<h1 class="title toc-ignore">bigQueryR</h1> |
| 72 | +<h4 class="author"><em>Mark Edmondson</em></h4> |
| 73 | +<h4 class="date"><em>2016-08-11</em></h4> |
| 74 | + |
| 75 | + |
| 76 | + |
| 77 | +<div id="introduction" class="section level2"> |
| 78 | +<h2>Introduction</h2> |
| 79 | +<p>This is a package for interating with <a href="https://cloud.google.com/bigquery/">BigQuery</a> from within R.</p> |
| 80 | +<p>You may want instead to use <a href="https://github.com/hadley/bigrquery">bigrquery</a> which is more developed with integration with <code>dplyr</code> etc. Some functions from <code>bigrquery</code> are used in this package.</p> |
| 81 | +<div id="why-this-package-then" class="section level3"> |
| 82 | +<h3>Why this package then?</h3> |
| 83 | +<p>This package is here as it uses <a href="https://github.com/MarkEdmondson1234/googleAuthR">googleAuthR</a> as backend, so has Shiny support, and compatibility with other googleAuthR dependent packages.</p> |
| 84 | +<p>It also has support for data extracts to Google Cloud Storage, meaning you can download data and make the download URL available to a user via their Google email. If you do a query normally with over 100000 results it hangs and errors.</p> |
| 85 | +<p>An example of a BigQuery Shiny app running OAuth2 is here, the <a href="https://mark.shinyapps.io/bigquery-viz/">BigQuery Visualiser</a></p> |
| 86 | +</div> |
| 87 | +</div> |
| 88 | +<div id="authentication" class="section level2"> |
| 89 | +<h2>Authentication</h2> |
| 90 | +<p>Authentication is as used in other <code>googleAuthR</code> libraries:</p> |
| 91 | +<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(bigQueryR) |
| 92 | + |
| 93 | +## this will open your browser |
| 94 | +## Authenticate with an email that has access to the BigQuery project you need |
| 95 | +<span class="kw">bqr_auth</span>() |
| 96 | + |
| 97 | +## verify under a new user |
| 98 | +<span class="kw">bqr_auth</span>(<span class="dt">new_user=</span><span class="ot">TRUE</span>)</code></pre></div> |
| 99 | +<p>If you are authenticating under several APIs via <code>googleAuthR</code>then use <code>gar_auth()</code> instead with the appropriate scopes set.</p> |
| 100 | +<p>You can also use service-to-service JSON files and multi-user authentication under Shiny, see the <code>googleAuthR</code> readme for details.</p> |
| 101 | +</div> |
| 102 | +<div id="listing-bigquery-meta-data" class="section level2"> |
| 103 | +<h2>Listing BigQuery meta data</h2> |
| 104 | +<p>Various functions for listing what is in your BigQuery account.</p> |
| 105 | +<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(bigQueryR) |
| 106 | + |
| 107 | +## this will open your browser |
| 108 | +## Authenticate with an email that has access to the BigQuery project you need |
| 109 | +<span class="kw">bqr_auth</span>() |
| 110 | + |
| 111 | +## verify under a new user |
| 112 | +<span class="kw">bqr_auth</span>(<span class="dt">new_user=</span><span class="ot">TRUE</span>) |
| 113 | + |
| 114 | +## get projects |
| 115 | +projects <-<span class="st"> </span><span class="kw">bqr_list_projects</span>() |
| 116 | + |
| 117 | +my_project <-<span class="st"> </span>projects[<span class="dv">1</span>] |
| 118 | + |
| 119 | +## for first project, get datasets |
| 120 | +datasets <-<span class="st"> </span>bqr_list_datasets[my_project] |
| 121 | + |
| 122 | +my_dataset <-<span class="st"> </span>datasets[<span class="dv">1</span>] |
| 123 | +## list tables |
| 124 | +my_table <-<span class="st"> </span><span class="kw">bqr_list_tables</span>(my_project, my_dataset) |
| 125 | + |
| 126 | +## get metadata for table |
| 127 | +meta_table <-<span class="st"> </span><span class="kw">bqr_table_meta</span>(my_project, my_dataset, my_table)</code></pre></div> |
| 128 | +</div> |
| 129 | +<div id="simple-queries" class="section level2"> |
| 130 | +<h2>Simple Queries</h2> |
| 131 | +<p>You can pass in queries that have results under ~ 100000 rows using this command:</p> |
| 132 | +<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">bqr_query</span>(<span class="st">"big-query-r"</span>,<span class="st">"samples"</span>, |
| 133 | + <span class="st">"SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]"</span>)</code></pre></div> |
| 134 | +<p>More than that, and the API starts to hang and you are limited by your download bandwidth.</p> |
| 135 | +</div> |
| 136 | +<div id="asynchronous-queries" class="section level2"> |
| 137 | +<h2>Asynchronous Queries</h2> |
| 138 | +<p>For bigger queries, asynchronous queries save the results to another BigQuery table. You can check the progress of the job via <code>bqr_get_job</code></p> |
| 139 | +<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(bigQueryR) |
| 140 | + |
| 141 | +## Auth with a project that has at least BigQuery and Google Cloud Storage scope |
| 142 | +<span class="kw">bqr_auth</span>() |
| 143 | + |
| 144 | +## make a big query |
| 145 | +job <-<span class="st"> </span><span class="kw">bqr_query_asynch</span>(<span class="st">"your_project"</span>, |
| 146 | + <span class="st">"your_dataset"</span>, |
| 147 | + <span class="st">"SELECT * FROM blah LIMIT 9999999"</span>, |
| 148 | + <span class="dt">destinationTableId =</span> <span class="st">"bigResultTable"</span>) |
| 149 | + |
| 150 | +## poll the job to check its status |
| 151 | +## its done when job$status$state == "DONE" |
| 152 | +<span class="kw">bqr_get_job</span>(<span class="st">"your_project"</span>, job$jobReference$jobId) |
| 153 | + |
| 154 | +##once done, the query results are in "bigResultTable"</code></pre></div> |
| 155 | +<p>You may now want to download this data. For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, then downloading the data from there.</p> |
| 156 | +<p>You can create a bucket at Google Cloud Storage see <a href="https://cloud.google.com/storage/docs/cloud-console" class="uri">https://cloud.google.com/storage/docs/cloud-console</a>, or you can use <a href="https://github.com/cloudyr/googleCloudStorageR">library(googleCloudStorageR)</a></p> |
| 157 | +<p>Once created, you can extract your data via the below:</p> |
| 158 | +<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">## Create the data extract from BigQuery to Cloud Storage |
| 159 | +job_extract <-<span class="st"> </span><span class="kw">bqr_extract_data</span>(<span class="st">"your_project"</span>, |
| 160 | + <span class="st">"your_dataset"</span>, |
| 161 | + <span class="st">"bigResultTable"</span>, |
| 162 | + <span class="st">"your_cloud_storage_bucket_name"</span>) |
| 163 | + |
| 164 | +## poll the extract job to check its status |
| 165 | +## its done when job$status$state == "DONE" |
| 166 | +<span class="kw">bqr_get_job</span>(<span class="st">"your_project"</span>, job_extract$jobReference$jobId) |
| 167 | + |
| 168 | +## to download via a URL and not logging in via Google Cloud Storage interface: |
| 169 | +## Use an email that is Google account enabled |
| 170 | +## Requires scopes: |
| 171 | +## https://www.googleapis.com/auth/devstorage.full_control |
| 172 | +## https://www.googleapis.com/auth/cloud-platform |
| 173 | +## set via options("bigQueryR.scopes") and reauthenticate if needed |
| 174 | + |
| 175 | +download_url <- <span class=" st" > </span><span class=" kw" >bqr_grant_extract_access </span>(job_extract, <span class=" st" >" [email protected]" </span>) |
| 176 | + |
| 177 | +## download_url may be multiple if the data is > 1GB |
| 178 | +><span class="st"> </span>[<span class="dv">1</span>] <span class="st">"https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000000.csv"</span> |
| 179 | +><span class="st"> </span>[<span class="dv">2</span>] <span class="st">"https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000001.csv"</span> |
| 180 | +><span class="st"> </span>[<span class="dv">3</span>] <span class="st">"https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000002.csv"</span></code></pre></div> |
| 181 | +</div> |
| 182 | + |
| 183 | + |
| 184 | + |
| 185 | +<!-- dynamically load mathjax for compatibility with self-contained --> |
| 186 | +<script> |
| 187 | + (function () { |
| 188 | + var script = document.createElement("script"); |
| 189 | + script.type = "text/javascript"; |
| 190 | + script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; |
| 191 | + document.getElementsByTagName("head")[0].appendChild(script); |
| 192 | + })(); |
| 193 | +</script> |
| 194 | + |
| 195 | +</body> |
| 196 | +</html> |
0 commit comments