Skip to content

Commit 6b3620a

Browse files
committed
BUG: Update pandas-gbq to latest version of google-cloud-bigquery
There was a breaking change in 0.32.0.dev1 which changed the way configuration for the query job gets loaded. Also, it added the 'description' field to the schema resource, so this change updates the schema comparison logic to account for that. Updates the MASTER build in CI to also build with google-cloud-bigquery at MASTER.
1 parent 9666965 commit 6b3620a

File tree

7 files changed

+67
-57
lines changed

7 files changed

+67
-57
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ install:
2828
conda install -q numpy pytz python-dateutil;
2929
PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com";
3030
pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS pandas;
31+
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=api_core';
32+
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=core';
33+
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=bigquery';
3134
else
3235
conda install -q pandas=$PANDAS;
3336
fi

ci/requirements-3.5-0.18.1.pip

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
google-auth==1.4.1
22
google-auth-oauthlib==0.0.1
33
mock
4-
google-cloud-bigquery==0.29.0
4+
google-cloud-bigquery==0.32.0

ci/requirements-3.6-MASTER.pip

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
google-auth
22
google-auth-oauthlib
33
mock
4-
google-cloud-bigquery

docs/source/changelog.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
Changelog
22
=========
33

4-
0.3.2 / [TBD]
4+
0.4.0 / [TBD]
55
------------------
66
- Fix bug with querying for an array of floats (:issue:`123`)
7+
- Fix bug with integer columns on Windows. Explicitly use 64bit integers when converting from BQ types. (:issue:`119`)
8+
- Update ``google-cloud-python`` dependency to version 0.32.0+ (:issue:`TBD`)
79

810
0.3.1 / 2018-02-13
911
------------------

pandas_gbq/gbq.py

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import time
55
import warnings
66
from datetime import datetime
7-
from distutils.version import StrictVersion
87
from time import sleep
98

109
import numpy as np
@@ -23,17 +22,15 @@ def _check_google_client_version():
2322
raise ImportError('Could not import pkg_resources (setuptools).')
2423

2524
# https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
26-
bigquery_client_minimum_version = '0.29.0'
25+
bigquery_minimum_version = pkg_resources.parse_version('0.32.0.dev1')
26+
bigquery_installed_version = pkg_resources.get_distribution(
27+
'google-cloud-bigquery').parsed_version
2728

28-
_BIGQUERY_CLIENT_VERSION = pkg_resources.get_distribution(
29-
'google-cloud-bigquery').version
30-
31-
if (StrictVersion(_BIGQUERY_CLIENT_VERSION) <
32-
StrictVersion(bigquery_client_minimum_version)):
33-
raise ImportError('pandas-gbq requires google-cloud-bigquery >= {0}, '
34-
'current version {1}'
35-
.format(bigquery_client_minimum_version,
36-
_BIGQUERY_CLIENT_VERSION))
29+
if bigquery_installed_version < bigquery_minimum_version:
30+
raise ImportError(
31+
'pandas-gbq requires google-cloud-bigquery >= {0}, '
32+
'current version {1}'.format(
33+
bigquery_minimum_version, bigquery_installed_version))
3734

3835

3936
def _test_google_api_imports():
@@ -459,29 +456,23 @@ def run_query(self, query, **kwargs):
459456
}
460457
config = kwargs.get('configuration')
461458
if config is not None:
462-
if len(config) != 1:
463-
raise ValueError("Only one job type must be specified, but "
464-
"given {}".format(','.join(config.keys())))
465-
if 'query' in config:
466-
if 'query' in config['query']:
467-
if query is not None:
468-
raise ValueError("Query statement can't be specified "
469-
"inside config while it is specified "
470-
"as parameter")
471-
query = config['query']['query']
472-
del config['query']['query']
473-
474-
job_config['query'].update(config['query'])
475-
else:
476-
raise ValueError("Only 'query' job type is supported")
459+
job_config.update(config)
460+
461+
if 'query' in config and 'query' in config['query']:
462+
if query is not None:
463+
raise ValueError("Query statement can't be specified "
464+
"inside config while it is specified "
465+
"as parameter")
466+
query = config['query']['query']
467+
del config['query']['query']
477468

478469
self._start_timer()
479470
try:
480471

481472
logger.info('Requesting query... ')
482473
query_reply = self.client.query(
483474
query,
484-
job_config=QueryJobConfig.from_api_repr(job_config['query']))
475+
job_config=QueryJobConfig.from_api_repr(job_config))
485476
logger.info('ok.\nQuery running...')
486477
except (RefreshError, ValueError):
487478
if self.private_key:
@@ -598,6 +589,15 @@ def schema(self, dataset_id, table_id):
598589
except self.http_error as ex:
599590
self.process_http_error(ex)
600591

592+
def _clean_schema_fields(self, fields):
593+
"""Return a sanitized version of the schema for comparisons."""
594+
fields_sorted = sorted(fields, key=lambda field: field['name'])
595+
# Ignore mode and description when comparing schemas.
596+
return [
597+
{'name': field['name'], 'type': field['type']}
598+
for field in fields_sorted
599+
]
600+
601601
def verify_schema(self, dataset_id, table_id, schema):
602602
"""Indicate whether schemas match exactly
603603
@@ -621,17 +621,9 @@ def verify_schema(self, dataset_id, table_id, schema):
621621
Whether the schemas match
622622
"""
623623

624-
fields_remote = sorted(self.schema(dataset_id, table_id),
625-
key=lambda x: x['name'])
626-
fields_local = sorted(schema['fields'], key=lambda x: x['name'])
627-
628-
# Ignore mode when comparing schemas.
629-
for field in fields_local:
630-
if 'mode' in field:
631-
del field['mode']
632-
for field in fields_remote:
633-
if 'mode' in field:
634-
del field['mode']
624+
fields_remote = self._clean_schema_fields(
625+
self.schema(dataset_id, table_id))
626+
fields_local = self._clean_schema_fields(schema['fields'])
635627

636628
return fields_remote == fields_local
637629

@@ -658,16 +650,9 @@ def schema_is_subset(self, dataset_id, table_id, schema):
658650
Whether the passed schema is a subset
659651
"""
660652

661-
fields_remote = self.schema(dataset_id, table_id)
662-
fields_local = schema['fields']
663-
664-
# Ignore mode when comparing schemas.
665-
for field in fields_local:
666-
if 'mode' in field:
667-
del field['mode']
668-
for field in fields_remote:
669-
if 'mode' in field:
670-
del field['mode']
653+
fields_remote = self._clean_schema_fields(
654+
self.schema(dataset_id, table_id))
655+
fields_local = self._clean_schema_fields(schema['fields'])
671656

672657
return all(field in fields_remote for field in fields_local)
673658

@@ -709,7 +694,7 @@ def _parse_data(schema, rows):
709694
col_names = [str(field['name']) for field in fields]
710695
col_dtypes = [
711696
dtype_map.get(field['type'].upper(), object)
712-
if field['mode'] != 'repeated'
697+
if field['mode'].lower() != 'repeated'
713698
else object
714699
for field in fields
715700
]
@@ -847,7 +832,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
847832
for field in schema['fields']:
848833
if field['type'].upper() in type_map and \
849834
final_df[field['name']].notnull().all() and \
850-
field['mode'] != 'repeated':
835+
field['mode'].lower() != 'repeated':
851836
final_df[field['name']] = \
852837
final_df[field['name']].astype(type_map[field['type'].upper()])
853838

pandas_gbq/tests/test_gbq.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,10 +1266,30 @@ def test_retrieve_schema(self):
12661266
test_id = "15"
12671267
test_schema = {
12681268
'fields': [
1269-
{'name': 'A', 'type': 'FLOAT', 'mode': 'NULLABLE'},
1270-
{'name': 'B', 'type': 'FLOAT', 'mode': 'NULLABLE'},
1271-
{'name': 'C', 'type': 'STRING', 'mode': 'NULLABLE'},
1272-
{'name': 'D', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}
1269+
{
1270+
'name': 'A',
1271+
'type': 'FLOAT',
1272+
'mode': 'NULLABLE',
1273+
'description': None,
1274+
},
1275+
{
1276+
'name': 'B',
1277+
'type': 'FLOAT',
1278+
'mode': 'NULLABLE',
1279+
'description': None,
1280+
},
1281+
{
1282+
'name': 'C',
1283+
'type': 'STRING',
1284+
'mode': 'NULLABLE',
1285+
'description': None,
1286+
},
1287+
{
1288+
'name': 'D',
1289+
'type': 'TIMESTAMP',
1290+
'mode': 'NULLABLE',
1291+
'description': None,
1292+
},
12731293
]
12741294
}
12751295

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def readme():
1717

1818

1919
INSTALL_REQUIRES = [
20+
'setuptools',
2021
'pandas',
2122
'google-auth',
2223
'google-auth-oauthlib',

0 commit comments

Comments
 (0)