Skip to content

Commit 86ca84d

Browse files
partheajreback
authored andcommitted
TST: Fix gbq integration tests. gbq._Dataset.dataset() would not return full results
This PR resolves an issue where `gbq._Dataset.datasets()` would not return all datasets under a Google BigQuery project. If `'nextPageToken'` is populated, then another `datasets().list()` request should be sent with `'pageToken'` set to collect more results. In the past few days, additional datasets were added under the Google BigQuery project id used by pandas as part of the following github project : https://github.com/pydata/pandas-gbq . The addition of datasets caused many gbq unit tests to fail because in function `clean_gbq_environment()`, we check to see if the dataset exists using the incomplete results from `gbq._Dataset.datasets()` before we attempt to delete it. Author: Anthonios Partheniou <[email protected]> Closes #15381 from parthea/fix-broken-gbq-unit-tests and squashes the following commits: 61bc1e7 [Anthonios Partheniou] TST: Fix gbq tests. gbq.dataset()/gbq.tables would not return full results.
1 parent d9e75c7 commit 86ca84d

File tree

2 files changed

+52
-31
lines changed

2 files changed

+52
-31
lines changed

pandas/io/gbq.py

+44-23
Original file line numberDiff line numberDiff line change
@@ -1056,21 +1056,32 @@ def datasets(self):
10561056
List of datasets under the specific project
10571057
"""
10581058

1059-
try:
1060-
list_dataset_response = self.service.datasets().list(
1061-
projectId=self.project_id).execute().get('datasets', None)
1059+
dataset_list = []
1060+
next_page_token = None
1061+
first_query = True
10621062

1063-
if not list_dataset_response:
1064-
return []
1063+
while first_query or next_page_token:
1064+
first_query = False
10651065

1066-
dataset_list = list()
1066+
try:
1067+
list_dataset_response = self.service.datasets().list(
1068+
projectId=self.project_id,
1069+
pageToken=next_page_token).execute()
10671070

1068-
for row_num, raw_row in enumerate(list_dataset_response):
1069-
dataset_list.append(raw_row['datasetReference']['datasetId'])
1071+
dataset_response = list_dataset_response.get('datasets')
1072+
next_page_token = list_dataset_response.get('nextPageToken')
10701073

1071-
return dataset_list
1072-
except self.http_error as ex:
1073-
self.process_http_error(ex)
1074+
if not dataset_response:
1075+
return dataset_list
1076+
1077+
for row_num, raw_row in enumerate(dataset_response):
1078+
dataset_list.append(
1079+
raw_row['datasetReference']['datasetId'])
1080+
1081+
except self.http_error as ex:
1082+
self.process_http_error(ex)
1083+
1084+
return dataset_list
10741085

10751086
def create(self, dataset_id):
10761087
""" Create a dataset in Google BigQuery
@@ -1140,19 +1151,29 @@ def tables(self, dataset_id):
11401151
List of tables under the specific dataset
11411152
"""
11421153

1143-
try:
1144-
list_table_response = self.service.tables().list(
1145-
projectId=self.project_id,
1146-
datasetId=dataset_id).execute().get('tables', None)
1154+
table_list = []
1155+
next_page_token = None
1156+
first_query = True
11471157

1148-
if not list_table_response:
1149-
return []
1158+
while first_query or next_page_token:
1159+
first_query = False
11501160

1151-
table_list = list()
1161+
try:
1162+
list_table_response = self.service.tables().list(
1163+
projectId=self.project_id,
1164+
datasetId=dataset_id,
1165+
pageToken=next_page_token).execute()
11521166

1153-
for row_num, raw_row in enumerate(list_table_response):
1154-
table_list.append(raw_row['tableReference']['tableId'])
1167+
table_response = list_table_response.get('tables')
1168+
next_page_token = list_table_response.get('nextPageToken')
11551169

1156-
return table_list
1157-
except self.http_error as ex:
1158-
self.process_http_error(ex)
1170+
if not table_response:
1171+
return table_list
1172+
1173+
for row_num, raw_row in enumerate(table_response):
1174+
table_list.append(raw_row['tableReference']['tableId'])
1175+
1176+
except self.http_error as ex:
1177+
self.process_http_error(ex)
1178+
1179+
return table_list

pandas/tests/io/test_gbq.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated():
253253
gbq.generate_bq_schema(df)
254254

255255

256-
@pytest.mark.xfail(run=False, reason="flaky tests")
256+
@pytest.mark.single
257257
class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase):
258258

259259
def setUp(self):
@@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self):
299299
self.assertTrue(isinstance(credentials, GoogleCredentials))
300300

301301

302-
@pytest.mark.xfail(run=False, reason="flaky tests")
302+
@pytest.mark.single
303303
class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase):
304304
def setUp(self):
305305
_setup_common()
@@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self):
331331
self.assertTrue(pages is not None)
332332

333333

334-
@pytest.mark.xfail(run=False, reason="flaky tests")
334+
@pytest.mark.single
335335
class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase):
336336
def setUp(self):
337337
_setup_common()
@@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self):
449449
private_key=re.sub('[a-z]', '9', _get_private_key_contents()))
450450

451451

452-
@pytest.mark.xfail(run=False, reason="flaky tests")
452+
@pytest.mark.single
453453
class TestReadGBQIntegration(tm.TestCase):
454454

455455
@classmethod
@@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self):
503503
tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
504504

505505

506-
@pytest.mark.xfail(run=False, reason="flaky tests")
506+
@pytest.mark.single
507507
class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):
508508

509509
@classmethod
@@ -906,7 +906,7 @@ def test_configuration_without_query(self):
906906
configuration=config)
907907

908908

909-
@pytest.mark.xfail(run=False, reason="flaky tests")
909+
@pytest.mark.single
910910
class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):
911911
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
912912
# As a workaround to this issue, each test should use a unique table name.
@@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self):
12191219
DATASET_ID + "_not_found"), 'Expected dataset not to exist')
12201220

12211221

1222-
@pytest.mark.xfail(run=False, reason="flaky tests")
1222+
@pytest.mark.single
12231223
class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase):
12241224
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
12251225
# As a workaround to this issue, each test should use a unique table name.
@@ -1277,7 +1277,7 @@ def test_upload_data(self):
12771277
self.assertEqual(result['num_rows'][0], test_size)
12781278

12791279

1280-
@pytest.mark.xfail(run=False, reason="flaky tests")
1280+
@pytest.mark.single
12811281
class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase):
12821282
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
12831283
# As a workaround to this issue, each test should use a unique table name.

0 commit comments

Comments
 (0)