Skip to content

Commit ae6a7a2

Browse files
committed
ENH Append loads data if schema is subset of existing schema
1 parent b8ca74f commit ae6a7a2

File tree

2 files changed

+36
-7
lines changed

2 files changed

+36
-7
lines changed

pandas_gbq/gbq.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,14 @@ def verify_schema(self, dataset_id, table_id, schema):
588588

589589
return fields_remote == fields_local
590590

591+
def schema_is_subset(self, dataset_id, table_id, schema):
592+
fields_remote = set([json.dumps(field)
593+
for field in self.schema(dataset_id, table_id)])
594+
fields_local = set(json.dumps(field_local)
595+
for field_local in schema['fields'])
596+
597+
return fields_remote >= fields_local
598+
591599
def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
592600
delay = 0
593601

@@ -850,7 +858,8 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
850858
connector.delete_and_recreate_table(
851859
dataset_id, table_id, table_schema)
852860
elif if_exists == 'append':
853-
if not connector.verify_schema(dataset_id, table_id, table_schema):
861+
if not connector.schema_is_subset(dataset_id,
862+
table_id, table_schema):
854863
raise InvalidSchema("Please verify that the structure and "
855864
"data types in the DataFrame match the "
856865
"schema of the destination table.")

pandas_gbq/tests/test_gbq.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,9 @@
1919
import pandas.util.testing as tm
2020
from pandas.compat.numpy import np_datetime64_compat
2121

22-
PROJECT_ID = os.environ.get('PROJECT_ID')
23-
PRIVATE_KEY_JSON_PATH = 'client_secrets.json'
24-
PRIVATE_KEY_JSON_CONTENTS = os.environ.get('SERVICE_ACCOUNT_KEY')
25-
# PROJECT_ID = None
26-
# PRIVATE_KEY_JSON_PATH = None
27-
# PRIVATE_KEY_JSON_CONTENTS = None
22+
PROJECT_ID = os.getenv('PROJECT_ID')
23+
PRIVATE_KEY_JSON_PATH = os.getenv('PRIVATE_KEY_JSON_PATH')
24+
PRIVATE_KEY_JSON_CONTENTS = os.getenv('PRIVATE_KEY_JSON_CONTENTS')
2825

2926
TABLE_ID = 'new_test'
3027

@@ -1085,6 +1082,29 @@ def test_upload_data_if_table_exists_append(self):
10851082
_get_project_id(), if_exists='append',
10861083
private_key=_get_private_key_path())
10871084

1085+
def test_upload_subset_columns_if_table_exists_append(self):
1086+
test_id = "16"
1087+
test_size = 10
1088+
df = make_mixed_dataframe_v2(test_size)
1089+
df_subset_cols = df.iloc[:, :2]
1090+
1091+
# Initialize table with sample data
1092+
gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(),
1093+
chunksize=10000, private_key=_get_private_key_path())
1094+
1095+
# Test the if_exists parameter with value 'append'
1096+
gbq.to_gbq(df_subset_cols,
1097+
self.destination_table + test_id, _get_project_id(),
1098+
if_exists='append', private_key=_get_private_key_path())
1099+
1100+
sleep(30) # <- Curses Google!!!
1101+
1102+
result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
1103+
.format(self.destination_table + test_id),
1104+
project_id=_get_project_id(),
1105+
private_key=_get_private_key_path())
1106+
self.assertEqual(result['num_rows'][0], test_size * 2)
1107+
10881108
def test_upload_data_if_table_exists_replace(self):
10891109
test_id = "4"
10901110
test_size = 10

0 commit comments

Comments
 (0)