Skip to content

Commit dd90f35

Browse files
authored
Merge pull request #730 from GoogleCloudPlatform/tswast-bq
BigQuery: add samples for structs, timestamps, and arrays in query parameters.
2 parents 4eef615 + a8c5f42 commit dd90f35

File tree

5 files changed

+274
-130
lines changed

5 files changed

+274
-130
lines changed

bigquery/cloud-client/query_params.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2016 Google Inc.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Command-line app to perform queries with parameters in BigQuery.
18+
19+
For more information, see the README.md under /bigquery.
20+
21+
Example invocation:
22+
$ python query_params.py --use-named-params 'romeoandjuliet' 100
23+
$ python query_params.py --use-positional-params 'romeoandjuliet' 100
24+
"""
25+
26+
import argparse
27+
import datetime
28+
import time
29+
import uuid
30+
31+
from google.cloud import bigquery
32+
import pytz
33+
34+
35+
def wait_for_job(job):
36+
while True:
37+
job.reload() # Refreshes the state via a GET request.
38+
if job.state == 'DONE':
39+
if job.error_result:
40+
raise RuntimeError(job.errors)
41+
return
42+
time.sleep(1)
43+
44+
45+
def print_results(query_results):
46+
"""Print the query results by requesting a page at a time."""
47+
page_token = None
48+
49+
while True:
50+
rows, total_rows, page_token = query_results.fetch_data(
51+
max_results=10,
52+
page_token=page_token)
53+
54+
for row in rows:
55+
print(row)
56+
57+
if not page_token:
58+
break
59+
60+
61+
def query_positional_params(corpus, min_word_count):
62+
client = bigquery.Client()
63+
query = """
64+
SELECT word, word_count
65+
FROM `bigquery-public-data.samples.shakespeare`
66+
WHERE corpus = ?
67+
AND word_count >= ?
68+
ORDER BY word_count DESC;
69+
"""
70+
query_job = client.run_async_query(
71+
str(uuid.uuid4()),
72+
query,
73+
query_parameters=(
74+
bigquery.ScalarQueryParameter(
75+
# Set the name to None to use positional parameters (? symbol
76+
# in the query). Note that you cannot mix named and positional
77+
# parameters.
78+
None, 'STRING', corpus),
79+
bigquery.ScalarQueryParameter(None, 'INT64', min_word_count)))
80+
81+
# Only standard SQL syntax supports parameters in queries.
82+
# See: https://cloud.google.com/bigquery/sql-reference/
83+
query_job.use_legacy_sql = False
84+
85+
# Start the query and wait for the job to complete.
86+
query_job.begin()
87+
wait_for_job(query_job)
88+
print_results(query_job.results())
89+
90+
91+
def query_named_params(corpus, min_word_count):
92+
client = bigquery.Client()
93+
query = """
94+
SELECT word, word_count
95+
FROM `bigquery-public-data.samples.shakespeare`
96+
WHERE corpus = @corpus
97+
AND word_count >= @min_word_count
98+
ORDER BY word_count DESC;
99+
"""
100+
query_job = client.run_async_query(
101+
str(uuid.uuid4()),
102+
query,
103+
query_parameters=(
104+
bigquery.ScalarQueryParameter('corpus', 'STRING', corpus),
105+
bigquery.ScalarQueryParameter(
106+
'min_word_count', 'INT64', min_word_count)))
107+
query_job.use_legacy_sql = False
108+
109+
# Start the query and wait for the job to complete.
110+
query_job.begin()
111+
wait_for_job(query_job)
112+
print_results(query_job.results())
113+
114+
115+
def query_array_params(gender, states):
116+
client = bigquery.Client()
117+
query = """
118+
SELECT name, sum(number) as count
119+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
120+
WHERE gender = @gender
121+
AND state IN UNNEST(@states)
122+
GROUP BY name
123+
ORDER BY count DESC
124+
LIMIT 10;
125+
"""
126+
query_job = client.run_async_query(
127+
str(uuid.uuid4()),
128+
query,
129+
query_parameters=(
130+
bigquery.ScalarQueryParameter('gender', 'STRING', gender),
131+
bigquery.ArrayQueryParameter('states', 'STRING', states)))
132+
query_job.use_legacy_sql = False
133+
134+
# Start the query and wait for the job to complete.
135+
query_job.begin()
136+
wait_for_job(query_job)
137+
print_results(query_job.results())
138+
139+
140+
def query_timestamp_params(year, month, day, hour, minute):
141+
client = bigquery.Client()
142+
query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);'
143+
query_job = client.run_async_query(
144+
str(uuid.uuid4()),
145+
query,
146+
query_parameters=[
147+
bigquery.ScalarQueryParameter(
148+
'ts_value',
149+
'TIMESTAMP',
150+
datetime.datetime(
151+
year, month, day, hour, minute, tzinfo=pytz.UTC))])
152+
query_job.use_legacy_sql = False
153+
154+
# Start the query and wait for the job to complete.
155+
query_job.begin()
156+
wait_for_job(query_job)
157+
print_results(query_job.results())
158+
159+
160+
def query_struct_params(x, y):
161+
client = bigquery.Client()
162+
query = 'SELECT @struct_value AS s;'
163+
query_job = client.run_async_query(
164+
str(uuid.uuid4()),
165+
query,
166+
query_parameters=[
167+
bigquery.StructQueryParameter(
168+
'struct_value',
169+
bigquery.ScalarQueryParameter('x', 'INT64', x),
170+
bigquery.ScalarQueryParameter('y', 'STRING', y))])
171+
query_job.use_legacy_sql = False
172+
173+
# Start the query and wait for the job to complete.
174+
query_job.begin()
175+
wait_for_job(query_job)
176+
print_results(query_job.results())
177+
178+
179+
if __name__ == '__main__':
180+
parser = argparse.ArgumentParser(
181+
description=__doc__,
182+
formatter_class=argparse.RawDescriptionHelpFormatter)
183+
subparsers = parser.add_subparsers(dest='sample', help='samples')
184+
named_parser = subparsers.add_parser(
185+
'named',
186+
help='Run a query with named parameters.')
187+
named_parser.add_argument(
188+
'corpus',
189+
help='Corpus to search from Shakespeare dataset.')
190+
named_parser.add_argument(
191+
'min_word_count',
192+
help='Minimum count of words to query.',
193+
type=int)
194+
positional_parser = subparsers.add_parser(
195+
'positional',
196+
help='Run a query with positional parameters.')
197+
positional_parser.add_argument(
198+
'corpus',
199+
help='Corpus to search from Shakespeare dataset.')
200+
positional_parser.add_argument(
201+
'min_word_count',
202+
help='Minimum count of words to query.',
203+
type=int)
204+
array_parser = subparsers.add_parser(
205+
'array',
206+
help='Run a query with an array parameter.')
207+
array_parser.add_argument(
208+
'gender',
209+
choices=['F', 'M'],
210+
help='Gender of baby in the Social Security baby names database.')
211+
array_parser.add_argument(
212+
'states',
213+
help='U.S. States to consider for popular baby names.',
214+
nargs='+')
215+
timestamp_parser = subparsers.add_parser(
216+
'timestamp',
217+
help='Run a query with a timestamp parameter.')
218+
timestamp_parser.add_argument('year', type=int)
219+
timestamp_parser.add_argument('month', type=int)
220+
timestamp_parser.add_argument('day', type=int)
221+
timestamp_parser.add_argument('hour', type=int)
222+
timestamp_parser.add_argument('minute', type=int)
223+
struct_parser = subparsers.add_parser(
224+
'struct',
225+
help='Run a query with a struct parameter.')
226+
struct_parser.add_argument('x', help='Integer for x', type=int)
227+
struct_parser.add_argument('y', help='String for y')
228+
args = parser.parse_args()
229+
230+
if args.sample == 'named':
231+
query_named_params(args.corpus, args.min_word_count)
232+
elif args.sample == 'positional':
233+
query_positional_params(args.corpus, args.min_word_count)
234+
elif args.sample == 'array':
235+
query_array_params(args.gender, args.states)
236+
elif args.sample == 'timestamp':
237+
query_timestamp_params(
238+
args.year, args.month, args.day, args.hour, args.minute)
239+
elif args.sample == 'struct':
240+
query_struct_params(args.x, args.y)
241+
else:
242+
print('Unexpected value for sample')

bigquery/cloud-client/sync_query_params_test.py renamed to bigquery/cloud-client/query_params_test.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,41 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import sync_query_params
15+
import query_params
1616

1717

18-
def test_sync_query_named_params(capsys):
19-
sync_query_params.sync_query_named_params(
18+
def test_query_array_params(capsys):
19+
query_params.query_array_params(
20+
gender='M',
21+
states=['WA', 'WI', 'WV', 'WY'])
22+
out, _ = capsys.readouterr()
23+
assert 'James' in out
24+
25+
26+
def test_query_named_params(capsys):
27+
query_params.query_named_params(
2028
corpus='romeoandjuliet',
2129
min_word_count=100)
2230
out, _ = capsys.readouterr()
2331
assert 'love' in out
2432

2533

26-
def test_sync_query_positional_params(capsys):
27-
sync_query_params.sync_query_positional_params(
34+
def test_query_positional_params(capsys):
35+
query_params.query_positional_params(
2836
corpus='romeoandjuliet',
2937
min_word_count=100)
3038
out, _ = capsys.readouterr()
3139
assert 'love' in out
40+
41+
42+
def test_query_struct_params(capsys):
43+
query_params.query_struct_params(765, "hello world")
44+
out, _ = capsys.readouterr()
45+
assert '765' in out
46+
assert 'hello world' in out
47+
48+
49+
def test_query_timestamp_params(capsys):
50+
query_params.query_timestamp_params(2016, 12, 7, 8, 0)
51+
out, _ = capsys.readouterr()
52+
assert '2016, 12, 7, 9, 0' in out
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
google-cloud-bigquery==0.24.0
2+
pytz==2016.10

0 commit comments

Comments
 (0)