Skip to content

Commit 410a0b8

Browse files
author
Robert Lacok
committed
to_gbq respects location argument properly
If dataset does not exist, it gets created in the correct location
1 parent 8b7f8fd commit 410a0b8

File tree

2 files changed

+63
-15
lines changed

2 files changed

+63
-15
lines changed

pandas_gbq/gbq.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ def __init__(
283283

284284
# BQ Queries costs $5 per TB. First 1 TB per month is free
285285
# see here for more: https://cloud.google.com/bigquery/pricing
286-
self.query_price_for_TB = 5. / 2 ** 40 # USD/TB
286+
self.query_price_for_TB = 5.0 / 2 ** 40 # USD/TB
287287

288288
def _start_timer(self):
289289
self.start = time.time()
@@ -895,7 +895,11 @@ def to_gbq(
895895
dataset_id, table_id = destination_table.rsplit(".", 1)
896896

897897
table = _Table(
898-
project_id, dataset_id, reauth=reauth, private_key=private_key
898+
project_id,
899+
dataset_id,
900+
reauth=reauth,
901+
private_key=private_key,
902+
location=location,
899903
)
900904

901905
if not table_schema:
@@ -967,9 +971,18 @@ def _generate_bq_schema(df, default_type="STRING"):
967971

968972

969973
class _Table(GbqConnector):
970-
def __init__(self, project_id, dataset_id, reauth=False, private_key=None):
974+
def __init__(
975+
self,
976+
project_id,
977+
dataset_id,
978+
reauth=False,
979+
private_key=None,
980+
location=None,
981+
):
971982
self.dataset_id = dataset_id
972-
super(_Table, self).__init__(project_id, reauth, private_key)
983+
super(_Table, self).__init__(
984+
project_id, reauth, private_key, location=location
985+
)
973986

974987
def exists(self, table_id):
975988
""" Check if a table exists in Google BigQuery
@@ -1017,9 +1030,11 @@ def create(self, table_id, schema):
10171030
if not _Dataset(self.project_id, private_key=self.private_key).exists(
10181031
self.dataset_id
10191032
):
1020-
_Dataset(self.project_id, private_key=self.private_key).create(
1021-
self.dataset_id
1022-
)
1033+
_Dataset(
1034+
self.project_id,
1035+
private_key=self.private_key,
1036+
location=self.location,
1037+
).create(self.dataset_id)
10231038

10241039
table_ref = self.client.dataset(self.dataset_id).table(table_id)
10251040
table = Table(table_ref)
@@ -1064,8 +1079,12 @@ def delete(self, table_id):
10641079

10651080

10661081
class _Dataset(GbqConnector):
1067-
def __init__(self, project_id, reauth=False, private_key=None):
1068-
super(_Dataset, self).__init__(project_id, reauth, private_key)
1082+
def __init__(
1083+
self, project_id, reauth=False, private_key=None, location=None
1084+
):
1085+
super(_Dataset, self).__init__(
1086+
project_id, reauth, private_key, location=location
1087+
)
10691088

10701089
def exists(self, dataset_id):
10711090
""" Check if a dataset exists in Google BigQuery
@@ -1107,6 +1126,9 @@ def create(self, dataset_id):
11071126

11081127
dataset = Dataset(self.client.dataset(dataset_id))
11091128

1129+
if self.location is not None:
1130+
dataset.location = self.location
1131+
11101132
try:
11111133
self.client.create_dataset(dataset)
11121134
except self.http_error as ex:

tests/system/test_gbq.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -741,12 +741,12 @@ def test_query_response_bytes(self):
741741
assert self.gbq_connector.sizeof_fmt(1048576) == "1.0 MB"
742742
assert self.gbq_connector.sizeof_fmt(1048576000) == "1000.0 MB"
743743
assert self.gbq_connector.sizeof_fmt(1073741824) == "1.0 GB"
744-
assert self.gbq_connector.sizeof_fmt(1.099512E12) == "1.0 TB"
745-
assert self.gbq_connector.sizeof_fmt(1.125900E15) == "1.0 PB"
746-
assert self.gbq_connector.sizeof_fmt(1.152922E18) == "1.0 EB"
747-
assert self.gbq_connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
748-
assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB"
749-
assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
744+
assert self.gbq_connector.sizeof_fmt(1.099512e12) == "1.0 TB"
745+
assert self.gbq_connector.sizeof_fmt(1.125900e15) == "1.0 PB"
746+
assert self.gbq_connector.sizeof_fmt(1.152922e18) == "1.0 EB"
747+
assert self.gbq_connector.sizeof_fmt(1.180592e21) == "1.0 ZB"
748+
assert self.gbq_connector.sizeof_fmt(1.208926e24) == "1.0 YB"
749+
assert self.gbq_connector.sizeof_fmt(1.208926e28) == "10000.0 YB"
750750

751751
def test_struct(self, project_id):
752752
query = """SELECT 1 int_field,
@@ -1325,6 +1325,32 @@ def test_upload_data_tokyo(
13251325
)
13261326
assert table.num_rows > 0
13271327

1328+
def test_upload_data_tokyo_non_existing_dataset(
1329+
self, project_id, random_dataset_id, bigquery_client
1330+
):
1331+
test_size = 10
1332+
df = make_mixed_dataframe_v2(test_size)
1333+
non_existing_tokyo_dataset = random_dataset_id
1334+
non_existing_tokyo_destination = "{}.to_gbq_test".format(
1335+
non_existing_tokyo_dataset
1336+
)
1337+
1338+
# Initialize table with sample data
1339+
gbq.to_gbq(
1340+
df,
1341+
non_existing_tokyo_destination,
1342+
project_id,
1343+
private_key=self.credentials,
1344+
location="asia-northeast1",
1345+
)
1346+
1347+
table = bigquery_client.get_table(
1348+
bigquery_client.dataset(non_existing_tokyo_dataset).table(
1349+
"to_gbq_test"
1350+
)
1351+
)
1352+
assert table.num_rows > 0
1353+
13281354

13291355
# _Dataset tests
13301356

0 commit comments

Comments
 (0)