Skip to content

Commit 3dc8ebe

Browse files
authored
fix: table schema change error (#692)
* fix: table schema change error * improve comments
1 parent 271f432 commit 3dc8ebe

File tree

3 files changed

+70
-12
lines changed

3 files changed

+70
-12
lines changed

pandas_gbq/gbq.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,12 +1205,15 @@ def to_gbq(
12051205
)
12061206
table_connector.create(table_id, table_schema)
12071207
else:
1208-
# Convert original schema (the schema that already exists) to pandas-gbq API format
1209-
original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema)
1210-
1211-
# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
1212-
# matches. See: https://github.com/pydata/pandas-gbq/issues/315
1213-
table_schema = pandas_gbq.schema.update_schema(table_schema, original_schema)
1208+
if if_exists == "append":
1209+
# Convert original schema (the schema that already exists) to pandas-gbq API format
1210+
original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema)
1211+
1212+
# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
1213+
# matches. See: https://github.com/pydata/pandas-gbq/issues/315
1214+
table_schema = pandas_gbq.schema.update_schema(
1215+
table_schema, original_schema
1216+
)
12141217

12151218
if dataframe.empty:
12161219
# Create the table (if needed), but don't try to run a load job with an

tests/system/test_gbq.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
788788
test_size = 10
789789
df = make_mixed_dataframe_v2(test_size)
790790
df_different_schema = make_mixed_dataframe_v1()
791+
schema_new = gbq.generate_bq_schema(df_different_schema)
791792

792793
# Initialize table with sample data
793794
gbq.to_gbq(
@@ -798,7 +799,7 @@ def test_upload_data_if_table_exists_replace(self, project_id):
798799
credentials=self.credentials,
799800
)
800801

801-
# Test the if_exists parameter with the value 'replace'.
802+
# When if_exists == 'replace', table schema should change too.
802803
gbq.to_gbq(
803804
df_different_schema,
804805
self.destination_table + test_id,
@@ -807,15 +808,16 @@ def test_upload_data_if_table_exists_replace(self, project_id):
807808
credentials=self.credentials,
808809
)
809810

810-
result = gbq.read_gbq(
811-
"SELECT COUNT(*) AS num_rows FROM {0}".format(
812-
self.destination_table + test_id
813-
),
811+
df_new = gbq.read_gbq(
812+
"SELECT * FROM {0}".format(self.destination_table + test_id),
814813
project_id=project_id,
815814
credentials=self.credentials,
816815
dialect="legacy",
817816
)
818-
assert result["num_rows"][0] == 5
817+
818+
schema_returned = gbq.generate_bq_schema(df_new)
819+
assert schema_new == schema_returned
820+
assert df_new.shape[0] == 5
819821

820822
def test_upload_data_if_table_exists_raises_value_error(self, project_id):
821823
test_id = "4"

tests/unit/test_gbq.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,59 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client):
650650
assert original_schema == original_schema_cp
651651

652652

653+
def test_load_modifies_schema(mock_bigquery_client):
654+
"""Test of https://github.com/googleapis/python-bigquery-pandas/issues/670"""
655+
from google.api_core.exceptions import NotFound
656+
657+
# Create table with new schema.
658+
mock_bigquery_client.get_table.side_effect = NotFound("nope")
659+
df = DataFrame(
660+
{
661+
"field1": ["a", "b"],
662+
"field2": [1, 2],
663+
"field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)],
664+
}
665+
)
666+
original_schema = [
667+
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
668+
{"name": "field2", "type": "INTEGER"},
669+
{"name": "field3", "type": "DATE"},
670+
]
671+
original_schema_cp = copy.deepcopy(original_schema)
672+
gbq.to_gbq(
673+
df,
674+
"dataset.schematest",
675+
project_id="my-project",
676+
table_schema=original_schema,
677+
if_exists="fail",
678+
)
679+
assert original_schema == original_schema_cp
680+
681+
# Test that when if_exists == "replace", the new table schema updates
682+
# according to the local schema.
683+
new_df = DataFrame(
684+
{
685+
"field1": ["a", "b"],
686+
"field2": ["c", "d"],
687+
"field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)],
688+
}
689+
)
690+
new_schema = [
691+
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
692+
{"name": "field2", "type": "STRING"},
693+
{"name": "field3", "type": "DATE"},
694+
]
695+
new_schema_cp = copy.deepcopy(new_schema)
696+
gbq.to_gbq(
697+
new_df,
698+
"dataset.schematest",
699+
project_id="my-project",
700+
table_schema=new_schema,
701+
if_exists="replace",
702+
)
703+
assert new_schema == new_schema_cp
704+
705+
653706
def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials):
654707
mock_service_account_credentials.project_id = "service_account_project_id"
655708
df = gbq.read_gbq(

0 commit comments

Comments
 (0)