Skip to content

Commit 8f19fdc

Browse files
authored
ENH: log rather than print (#18)
* rebase * Changes from feedback * remove verbose from docstrings and internal classes * WIP * update for newer code * remove verbose for private class * updates if cache hit * couple refinements * remove commented out code * show logs in pytest * docs
1 parent 09e4237 commit 8f19fdc

File tree

5 files changed

+71
-76
lines changed

5 files changed

+71
-76
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,6 @@ install:
4242
- python setup.py install
4343

4444
script:
45-
- pytest -s -v --cov=pandas_gbq --cov-report xml:/tmp/pytest-cov.xml pandas_gbq
45+
- pytest -v --cov=pandas_gbq --cov-report xml:/tmp/pytest-cov.xml pandas_gbq
4646
- if [[ $COVERAGE == 'true' ]]; then codecov ; fi
4747
- if [[ $LINT == 'true' ]]; then flake8 pandas_gbq -v ; fi

docs/source/intro.rst

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@ While this trade-off works well for most cases, it breaks down for storing
2626
values greater than 2**53. Such values in BigQuery can represent identifiers
2727
and unnoticed precision lost for identifier is what we want to avoid.
2828

29+
Logging
30+
+++++++
31+
32+
Because some requests take some time, this library will log its progress of
33+
longer queries. IPython & Jupyter by default attach a handler to the logger.
34+
If you're running in another process and want to see logs, or you want to see
35+
more verbose logs, you can do something like:
36+
37+
.. code-block:: ipython
38+
39+
import logging
40+
import sys
41+
logger = logging.getLogger('pandas_gbq')
42+
logger.setLevel(logging.DEBUG)
43+
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
44+
45+
2946
.. _authentication:
3047

3148
Authentication
@@ -49,8 +66,8 @@ Additional information on service accounts can be found
4966
`here <https://developers.google.com/identity/protocols/OAuth2#serviceaccount>`__.
5067

5168
Authentication via ``application default credentials`` is also possible, but only valid
52-
if the parameter ``private_key`` is not provided. This method requires that the
53-
credentials can be fetched from the development environment. Otherwise, the OAuth2
69+
if the parameter ``private_key`` is not provided. This method requires that the
70+
credentials can be fetched from the development environment. Otherwise, the OAuth2
5471
client-side authentication is used. Additional information can be found on
5572
`application default credentials <https://developers.google.com/identity/protocols/application-default-credentials>`__.
5673

docs/source/reading.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@ For more information about query configuration parameters see
4545
You can find your project id in the `Google developers console <https://console.developers.google.com>`__.
4646

4747

48-
.. note::
49-
50-
You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``.
51-
5248
.. note::
5349

5450
The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL

docs/source/writing.rst

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,6 @@ For example, the following writes ``df`` to a BigQuery table in batches of 10000
5454
5555
to_gbq(df, 'my_dataset.my_table', projectid, chunksize=10000)
5656
57-
You can also see the progress of your post via the ``verbose`` flag which defaults to ``True``.
58-
For example:
59-
60-
.. code-block:: python
61-
62-
In [8]: to_gbq(df, 'my_dataset.my_table', projectid, chunksize=10000, verbose=True)
63-
64-
Streaming Insert is 10% Complete
65-
Streaming Insert is 20% Complete
66-
Streaming Insert is 30% Complete
67-
Streaming Insert is 40% Complete
68-
Streaming Insert is 50% Complete
69-
Streaming Insert is 60% Complete
70-
Streaming Insert is 70% Complete
71-
Streaming Insert is 80% Complete
72-
Streaming Insert is 90% Complete
73-
Streaming Insert is 100% Complete
74-
7557
.. note::
7658

7759
If an error occurs while streaming data to BigQuery, see

pandas_gbq/gbq.py

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
2+
import logging
23
import os
3-
import sys
44
import time
55
import warnings
66
from datetime import datetime
@@ -11,6 +11,8 @@
1111
from pandas import DataFrame, compat
1212
from pandas.compat import lzip
1313

14+
logger = logging.getLogger(__name__)
15+
1416

1517
def _check_google_client_version():
1618

@@ -162,15 +164,14 @@ class TableCreationError(ValueError):
162164
class GbqConnector(object):
163165
scope = 'https://www.googleapis.com/auth/bigquery'
164166

165-
def __init__(self, project_id, reauth=False, verbose=False,
167+
def __init__(self, project_id, reauth=False,
166168
private_key=None, auth_local_webserver=False,
167169
dialect='legacy'):
168170
from google.api_core.exceptions import GoogleAPIError
169171
from google.api_core.exceptions import ClientError
170172
self.http_error = (ClientError, GoogleAPIError)
171173
self.project_id = project_id
172174
self.reauth = reauth
173-
self.verbose = verbose
174175
self.private_key = private_key
175176
self.auth_local_webserver = auth_local_webserver
176177
self.dialect = dialect
@@ -324,7 +325,7 @@ def save_user_account_credentials(self, credentials):
324325
}
325326
json.dump(credentials_json, credentials_file)
326327
except IOError:
327-
self._print('Unable to save credentials.')
328+
logger.warning('Unable to save credentials.')
328329

329330
def get_user_account_credentials(self):
330331
"""Gets user account credentials.
@@ -410,22 +411,17 @@ def get_service_account_credentials(self):
410411
"Can be obtained from: https://console.developers.google."
411412
"com/permissions/serviceaccounts")
412413

413-
def _print(self, msg, end='\n'):
414-
if self.verbose:
415-
sys.stdout.write(msg + end)
416-
sys.stdout.flush()
417-
418414
def _start_timer(self):
419415
self.start = time.time()
420416

421417
def get_elapsed_seconds(self):
422418
return round(time.time() - self.start, 2)
423419

424-
def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.',
425-
overlong=7):
420+
def log_elapsed_seconds(self, prefix='Elapsed', postfix='s.',
421+
overlong=7):
426422
sec = self.get_elapsed_seconds()
427423
if sec > overlong:
428-
self._print('{} {} {}'.format(prefix, sec, postfix))
424+
logger.info('{} {} {}'.format(prefix, sec, postfix))
429425

430426
# http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
431427
@staticmethod
@@ -481,11 +477,12 @@ def run_query(self, query, **kwargs):
481477

482478
self._start_timer()
483479
try:
484-
self._print('Requesting query... ', end="")
480+
481+
logger.info('Requesting query... ')
485482
query_reply = self.client.query(
486483
query,
487484
job_config=QueryJobConfig.from_api_repr(job_config['query']))
488-
self._print('ok.')
485+
logger.info('ok.\nQuery running...')
489486
except (RefreshError, ValueError):
490487
if self.private_key:
491488
raise AccessDenied(
@@ -498,10 +495,10 @@ def run_query(self, query, **kwargs):
498495
self.process_http_error(ex)
499496

500497
job_id = query_reply.job_id
501-
self._print('Job ID: %s\nQuery running...' % job_id)
498+
logger.info('Job ID: %s\nQuery running...' % job_id)
502499

503500
while query_reply.state != 'DONE':
504-
self.print_elapsed_seconds(' Elapsed', 's. Waiting...')
501+
self.log_elapsed_seconds(' Elapsed', 's. Waiting...')
505502

506503
timeout_ms = job_config['query'].get('timeoutMs')
507504
if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000:
@@ -520,19 +517,16 @@ def run_query(self, query, **kwargs):
520517
except self.http_error as ex:
521518
self.process_http_error(ex)
522519

523-
if self.verbose:
524-
if query_reply.cache_hit:
525-
self._print('Query done.\nCache hit.\n')
526-
else:
527-
bytes_processed = query_reply.total_bytes_processed or 0
528-
bytes_billed = query_reply.total_bytes_billed or 0
529-
self._print('Query done.\nProcessed: {} Billed: {}'.format(
530-
self.sizeof_fmt(bytes_processed),
531-
self.sizeof_fmt(bytes_billed)))
532-
self._print('Standard price: ${:,.2f} USD\n'.format(
533-
bytes_billed * self.query_price_for_TB))
534-
535-
self._print('Retrieving results...')
520+
if query_reply.cache_hit:
521+
logger.debug('Query done.\nCache hit.\n')
522+
else:
523+
bytes_processed = query_reply.total_bytes_processed or 0
524+
bytes_billed = query_reply.total_bytes_billed or 0
525+
logger.debug('Query done.\nProcessed: {} Billed: {}'.format(
526+
self.sizeof_fmt(bytes_processed),
527+
self.sizeof_fmt(bytes_billed)))
528+
logger.debug('Standard price: ${:,.2f} USD\n'.format(
529+
bytes_billed * self.query_price_for_TB))
536530

537531
try:
538532
rows_iter = query_reply.result()
@@ -546,8 +540,8 @@ def run_query(self, query, **kwargs):
546540
for field in rows_iter.schema],
547541
}
548542

549-
# print basic query stats
550-
self._print('Got {} rows.\n'.format(total_rows))
543+
# log basic query stats
544+
logger.info('Got {} rows.\n'.format(total_rows))
551545

552546
return schema, result_rows
553547

@@ -557,18 +551,18 @@ def load_data(
557551
from pandas_gbq import _load
558552

559553
total_rows = len(dataframe)
560-
self._print("\n\n")
554+
logger.info("\n\n")
561555

562556
try:
563557
for remaining_rows in _load.load_chunks(
564558
self.client, dataframe, dataset_id, table_id,
565559
chunksize=chunksize, schema=schema):
566-
self._print("\rLoad is {0}% Complete".format(
560+
logger.info("\rLoad is {0}% Complete".format(
567561
((total_rows - remaining_rows) * 100) / total_rows))
568562
except self.http_error as ex:
569563
self.process_http_error(ex)
570564

571-
self._print("\n")
565+
logger.info("\n")
572566

573567
def schema(self, dataset_id, table_id):
574568
"""Retrieve the schema of the table
@@ -687,7 +681,7 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
687681
# be a 120 second delay
688682

689683
if not self.verify_schema(dataset_id, table_id, table_schema):
690-
self._print('The existing table has a different schema. Please '
684+
logger.info('The existing table has a different schema. Please '
691685
'wait 2 minutes. See Google BigQuery issue #191')
692686
delay = 120
693687

@@ -729,7 +723,7 @@ def _parse_data(schema, rows):
729723

730724

731725
def read_gbq(query, project_id=None, index_col=None, col_order=None,
732-
reauth=False, verbose=True, private_key=None,
726+
reauth=False, verbose=None, private_key=None,
733727
auth_local_webserver=False, dialect='legacy', **kwargs):
734728
r"""Load data from Google BigQuery using google-cloud-python
735729
@@ -768,8 +762,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
768762
reauth : boolean (default False)
769763
Force Google BigQuery to reauthenticate the user. This is useful
770764
if multiple accounts are used.
771-
verbose : boolean (default True)
772-
Verbose output
773765
private_key : str (optional)
774766
Service account private key in JSON format. Can be file path
775767
or string contents. This is useful for remote server
@@ -793,6 +785,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
793785
compliant with the SQL 2011 standard. For more information
794786
see `BigQuery SQL Reference
795787
<https://cloud.google.com/bigquery/sql-reference/>`__
788+
verbose : None, deprecated
796789
797790
**kwargs : Arbitrary keyword arguments
798791
configuration (dict): query config parameters for job processing.
@@ -809,6 +802,11 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
809802
DataFrame representing results of query
810803
811804
"""
805+
if verbose is not None:
806+
warnings.warn(
807+
"verbose is deprecated and will be removed in "
808+
"a future version. Set logging level in order to vary "
809+
"verbosity", FutureWarning, stacklevel=1)
812810

813811
_test_google_api_imports()
814812

@@ -819,7 +817,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
819817
raise ValueError("'{0}' is not valid for dialect".format(dialect))
820818

821819
connector = GbqConnector(
822-
project_id, reauth=reauth, verbose=verbose, private_key=private_key,
820+
project_id, reauth=reauth, private_key=private_key,
823821
dialect=dialect, auth_local_webserver=auth_local_webserver)
824822
schema, rows = connector.run_query(query, **kwargs)
825823
final_df = _parse_data(schema, rows)
@@ -853,7 +851,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
853851
final_df[field['name']] = \
854852
final_df[field['name']].astype(type_map[field['type'].upper()])
855853

856-
connector.print_elapsed_seconds(
854+
connector.log_elapsed_seconds(
857855
'Total time taken',
858856
datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'),
859857
0
@@ -863,7 +861,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
863861

864862

865863
def to_gbq(dataframe, destination_table, project_id, chunksize=None,
866-
verbose=True, reauth=False, if_exists='fail', private_key=None,
864+
verbose=None, reauth=False, if_exists='fail', private_key=None,
867865
auth_local_webserver=False, table_schema=None):
868866
"""Write a DataFrame to a Google BigQuery table.
869867
@@ -899,8 +897,6 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=None,
899897
chunksize : int (default None)
900898
Number of rows to be inserted in each chunk from the dataframe. Use
901899
``None`` to load the dataframe in a single chunk.
902-
verbose : boolean (default True)
903-
Show percentage complete
904900
reauth : boolean (default False)
905901
Force Google BigQuery to reauthenticate the user. This is useful
906902
if multiple accounts are used.
@@ -930,10 +926,17 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=None,
930926
of DataFrame columns. See BigQuery API documentation on available
931927
names of a field.
932928
.. versionadded:: 0.3.1
929+
verbose : None, deprecated
933930
"""
934931

935932
_test_google_api_imports()
936933

934+
if verbose is not None:
935+
warnings.warn(
936+
"verbose is deprecated and will be removed in "
937+
"a future version. Set logging level in order to vary "
938+
"verbosity", FutureWarning, stacklevel=1)
939+
937940
if if_exists not in ('fail', 'replace', 'append'):
938941
raise ValueError("'{0}' is not valid for if_exists".format(if_exists))
939942

@@ -942,7 +945,7 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=None,
942945
"Invalid Table Name. Should be of the form 'datasetId.tableId' ")
943946

944947
connector = GbqConnector(
945-
project_id, reauth=reauth, verbose=verbose, private_key=private_key,
948+
project_id, reauth=reauth, private_key=private_key,
946949
auth_local_webserver=auth_local_webserver)
947950
dataset_id, table_id = destination_table.rsplit('.', 1)
948951

@@ -1004,10 +1007,9 @@ def _generate_bq_schema(df, default_type='STRING'):
10041007

10051008
class _Table(GbqConnector):
10061009

1007-
def __init__(self, project_id, dataset_id, reauth=False, verbose=False,
1008-
private_key=None):
1010+
def __init__(self, project_id, dataset_id, reauth=False, private_key=None):
10091011
self.dataset_id = dataset_id
1010-
super(_Table, self).__init__(project_id, reauth, verbose, private_key)
1012+
super(_Table, self).__init__(project_id, reauth, private_key)
10111013

10121014
def exists(self, table_id):
10131015
""" Check if a table exists in Google BigQuery
@@ -1101,10 +1103,8 @@ def delete(self, table_id):
11011103

11021104
class _Dataset(GbqConnector):
11031105

1104-
def __init__(self, project_id, reauth=False, verbose=False,
1105-
private_key=None):
1106-
super(_Dataset, self).__init__(project_id, reauth, verbose,
1107-
private_key)
1106+
def __init__(self, project_id, reauth=False, private_key=None):
1107+
super(_Dataset, self).__init__(project_id, reauth, private_key)
11081108

11091109
def exists(self, dataset_id):
11101110
""" Check if a dataset exists in Google BigQuery

0 commit comments

Comments
 (0)