Skip to content

Add bigquery create and copy table examples #514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigquery/cloud-client/export_data_to_gcs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


DATASET_ID = 'test_dataset'
TABLE_ID = 'test_import_table'
TABLE_ID = 'test_table'


def test_export_data_to_gcs(cloud_config, capsys):
Expand Down
88 changes: 88 additions & 0 deletions bigquery/cloud-client/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@
"""

import argparse
import time
import uuid

from gcloud import bigquery
import gcloud.bigquery.job


def list_projects():
Expand Down Expand Up @@ -82,6 +85,32 @@ def list_tables(dataset_name, project=None):
print(table.name)


def create_table(dataset_name, table_name, project=None):
"""Creates a simple table in the given dataset.

If no project is specified, then the currently active project is used.
"""
bigquery_client = bigquery.Client(project=project)
dataset = bigquery_client.dataset(dataset_name)

if not dataset.exists():
print('Dataset {} does not exist.'.format(dataset_name))
return

table = dataset.table(table_name)

# Set the table schema
table.schema = (
bigquery.SchemaField('Name', 'STRING'),
bigquery.SchemaField('Age', 'INTEGER'),
bigquery.SchemaField('Weight', 'FLOAT'),
)

table.create()

print('Created table {} in dataset {}.'.format(table_name, dataset_name))


def list_rows(dataset_name, table_name, project=None):
"""Prints rows in the given table.

Expand Down Expand Up @@ -126,6 +155,50 @@ def list_rows(dataset_name, table_name, project=None):
print(format_string.format(*row))


def copy_table(dataset_name, table_name, new_table_name, project=None):
"""Copies a table.

If no project is specified, then the currently active project is used.
"""
bigquery_client = bigquery.Client(project=project)
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)

# This sample shows the destination table in the same dataset and project,
# however, it's possible to copy across datasets and projects. You can
# also copy muliple source tables into a single destination table by
# providing addtional arguments to `copy_table`.
destination_table = dataset.table(new_table_name)

# Create a job to copy the table to the destination table.
job_id = str(uuid.uuid4())
job = bigquery_client.copy_table(
job_id, destination_table, table)

# Create the table if it doesn't exist.
job.create_disposition = (
gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

# Start the job.
job.begin()

# Wait for the the job to finish.
print('Waiting for job to finish...')
wait_for_job(job)

print('Table {} copied to {}.'.format(table_name, new_table_name))


def wait_for_job(job):
while True:
job.reload() # Refreshes the state via a GET request.
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.error_result)
return
time.sleep(1)


def delete_table(dataset_name, table_name, project=None):
"""Deletes a table in a given dataset.

Expand Down Expand Up @@ -155,11 +228,22 @@ def delete_table(dataset_name, table_name, project=None):
'list-tables', help=list_tables.__doc__)
list_tables_parser.add_argument('dataset_name')

create_table_parser = subparsers.add_parser(
'create-table', help=create_table.__doc__)
create_table_parser.add_argument('dataset_name')
create_table_parser.add_argument('table_name')

list_rows_parser = subparsers.add_parser(
'list-rows', help=list_rows.__doc__)
list_rows_parser.add_argument('dataset_name')
list_rows_parser.add_argument('table_name')

copy_table_parser = subparsers.add_parser(
'copy-table', help=copy_table.__doc__)
copy_table_parser.add_argument('dataset_name')
copy_table_parser.add_argument('table_name')
copy_table_parser.add_argument('new_table_name')

delete_table_parser = subparsers.add_parser(
'delete-table', help=delete_table.__doc__)
delete_table_parser.add_argument('dataset_name')
Expand All @@ -171,7 +255,11 @@ def delete_table(dataset_name, table_name, project=None):
list_datasets(args.project)
elif args.command == 'list-tables':
list_tables(args.dataset_name, args.project)
elif args.command == 'create-table':
create_table(args.dataset_name, args.table_name, args.project)
elif args.command == 'list-rows':
list_rows(args.dataset_name, args.table_name, args.project)
elif args.command == 'copy-table':
copy_table(args.dataset_name, args.table_name, args.new_table_name)
elif args.command == 'delete-table':
delete_table(args.dataset_name, args.table_name, args.project)
39 changes: 37 additions & 2 deletions bigquery/cloud-client/snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


DATASET_ID = 'test_dataset'
TABLE_ID = 'test_import_table'
TABLE_ID = 'test_table'


@pytest.mark.xfail(
Expand Down Expand Up @@ -62,7 +62,42 @@ def test_list_rows(capsys):
assert 'Age' in out


def test_delete_table(capsys):
@pytest.fixture
def temporary_table():
"""Fixture that returns a factory for tables that do not yet exist and
will be automatically deleted after the test."""
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(DATASET_ID)
tables = []

def factory(table_name):
new_table = dataset.table(table_name)
if new_table.exists():
new_table.delete()
tables.append(new_table)
return new_table

yield factory

for table in tables:
if table.exists():
table.delete()


def test_create_table(temporary_table):
new_table = temporary_table('test_create_table')
snippets.create_table(DATASET_ID, new_table.name)
assert new_table.exists()


@pytest.mark.slow
def test_copy_table(temporary_table):
new_table = temporary_table('test_copy_table')
snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name)
assert new_table.exists()


def test_delete_table():
# Create a table to delete
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(DATASET_ID)
Expand Down