From 3e124a28c95a7ba6cbaba0495799eafbe6ff490b Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 02:31:02 +0000 Subject: [PATCH 01/12] Fixtures for GBQ Tests --- pandas/tests/io/test_gbq.py | 62 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index f040dc2d0a70a..4a92986adc5e4 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -1,8 +1,10 @@ +from contextlib import ExitStack as does_not_raise from datetime import datetime import os import platform import numpy as np +from pandas_gbq.gbq import TableCreationError import pytest import pytz @@ -21,7 +23,7 @@ DATASET_ID = "pydata_pandas_bq_testing_py3" TABLE_ID = "new_test" -DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID) +DESTINATION_TABLE = f"{DATASET_ID + '1'}.{TABLE_ID}" VERSION = platform.python_version() @@ -149,33 +151,28 @@ def mock_read_gbq(sql, **kwargs): @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: - @classmethod - def setup_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - + @pytest.fixture() + def gbq_dataset(self): + # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() - cls.client = _get_client() - cls.dataset = cls.client.dataset(DATASET_ID + "1") + self.client = _get_client() + self.dataset = self.client.dataset(DATASET_ID + "1") try: # Clean-up previous test runs. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + self.client.delete_dataset(self.dataset, delete_contents=True) except api_exceptions.NotFound: pass # It's OK if the dataset doesn't already exist. - cls.client.create_dataset(bigquery.Dataset(cls.dataset)) + self.client.create_dataset(bigquery.Dataset(self.dataset)) + + yield - @classmethod - def teardown_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + # Teardown Dataset + self.client.delete_dataset(self.dataset, delete_contents=True) - def test_roundtrip(self): + def test_roundtrip(self, gbq_dataset): destination_table = DESTINATION_TABLE + "1" test_size = 20001 @@ -189,31 +186,38 @@ def test_roundtrip(self): ) result = pd.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), + f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), credentials=_get_credentials(), dialect="standard", ) assert result["num_rows"][0] == test_size - @pytest.mark.xfail(reason="Test breaking master") + @pytest.mark.xfail(reason="Test breaking master", strict=False) @pytest.mark.parametrize( - "if_exists, expected_num_rows", - [("append", 300), ("fail", 200), ("replace", 100)], + "if_exists, expected_num_rows, expectation", + [ + ("append", 300, does_not_raise()), + ("fail", 200, pytest.raises(TableCreationError)), + ("replace", 100, does_not_raise()), + ], ) - def test_gbq_if_exists(self, if_exists, expected_num_rows): + def test_gbq_if_exists( + self, if_exists, expected_num_rows, expectation, gbq_dataset + ): # GH 29598 destination_table = DESTINATION_TABLE + "2" test_size = 200 df = make_mixed_dataframe_v2(test_size) - df.to_gbq( - destination_table, - _get_project_id(), - chunksize=None, - credentials=_get_credentials(), - ) + with expectation: + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) df.iloc[:100].to_gbq( destination_table, From 51f51718e7ca6303f07ceeb31bd94e722acc5785 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 02:35:21 +0000 Subject: [PATCH 02/12] Fix import --- pandas/tests/io/test_gbq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 4a92986adc5e4..5f7df121afc24 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -4,7 +4,6 @@ import platform import numpy as np -from pandas_gbq.gbq import TableCreationError import pytest import pytz @@ -198,7 +197,7 @@ def test_roundtrip(self, gbq_dataset): "if_exists, expected_num_rows, expectation", [ ("append", 300, does_not_raise()), - ("fail", 200, pytest.raises(TableCreationError)), + ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)), ("replace", 100, does_not_raise()), ], ) From e69f9a1e3ef687f118d928d033891f413befc4aa Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:16:30 +0000 Subject: [PATCH 03/12] Update as per comments --- pandas/tests/io/test_gbq.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 5f7df121afc24..83eead64562da 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -2,6 +2,7 @@ from datetime import datetime import os import platform +import uuid import numpy as np import pytest @@ -19,11 +20,6 @@ PRIVATE_KEY_JSON_PATH = None PRIVATE_KEY_JSON_CONTENTS = None -DATASET_ID = "pydata_pandas_bq_testing_py3" - -TABLE_ID = "new_test" -DESTINATION_TABLE = f"{DATASET_ID + '1'}.{TABLE_ID}" - VERSION = platform.python_version() @@ -156,8 +152,10 @@ def gbq_dataset(self): _skip_if_no_project_id() _skip_if_no_private_key_path() + dataset_id = "pydata_pandas_bq_testing_py31" + self.client = _get_client() - self.dataset = self.client.dataset(DATASET_ID + "1") + self.dataset = self.client.dataset(dataset_id) try: # Clean-up previous test runs. self.client.delete_dataset(self.dataset, delete_contents=True) @@ -166,13 +164,15 @@ def gbq_dataset(self): self.client.create_dataset(bigquery.Dataset(self.dataset)) - yield + table_id = str(uuid.uuid1()) + destination_table = f"{dataset_id}.{table_id}" + yield destination_table # Teardown Dataset self.client.delete_dataset(self.dataset, delete_contents=True) def test_roundtrip(self, gbq_dataset): - destination_table = DESTINATION_TABLE + "1" + destination_table = gbq_dataset test_size = 20001 df = make_mixed_dataframe_v2(test_size) @@ -205,7 +205,7 @@ def test_gbq_if_exists( self, if_exists, expected_num_rows, expectation, gbq_dataset ): # GH 29598 - destination_table = DESTINATION_TABLE + "2" + destination_table = gbq_dataset test_size = 200 df = make_mixed_dataframe_v2(test_size) From d266ca0ce57a0934d74e0606c2c786250a2e823f Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:31:23 +0000 Subject: [PATCH 04/12] Update doc --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dfda1470413b7..a939c7e33bb17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1466,7 +1466,7 @@ def to_gbq( Behavior when the destination table exists. Value can be one of: ``'fail'`` - If table exists, do nothing. + If table exists raise pandas_gbq.gbq.TableCreationError. ``'replace'`` If table exists, drop it, recreate it, and insert data. ``'append'`` From add8e88e311fd4c2fc1c965a7c532da19b932ab7 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:34:49 +0000 Subject: [PATCH 05/12] Remove xfail --- pandas/tests/io/test_gbq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 83eead64562da..4bc4b7a0b1f2e 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -192,7 +192,6 @@ def test_roundtrip(self, gbq_dataset): ) assert result["num_rows"][0] == test_size - @pytest.mark.xfail(reason="Test breaking master", strict=False) @pytest.mark.parametrize( "if_exists, expected_num_rows, expectation", [ From 59a9dcde75e7a21c955ecc3518b81d06eb8b7f2c Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 20:18:46 +0000 Subject: [PATCH 06/12] Table name must be alphanumeric --- pandas/tests/io/test_gbq.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 4bc4b7a0b1f2e..5a0cb49535d51 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -2,7 +2,8 @@ from datetime import datetime import os import platform -import uuid +import random +import string import numpy as np import pytest @@ -164,8 +165,8 @@ def gbq_dataset(self): self.client.create_dataset(bigquery.Dataset(self.dataset)) - table_id = str(uuid.uuid1()) - destination_table = f"{dataset_id}.{table_id}" + table_name = "".join(random.choices(string.ascii_lowercase, k=10)) + destination_table = f"{dataset_id}.{table_name}" yield destination_table # Teardown Dataset From f4ecffbc855d62cb988422e0cce3138228e27842 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 22:58:11 +0000 Subject: [PATCH 07/12] Put expectation on correct to_gbq call --- pandas/tests/io/test_gbq.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 5a0cb49535d51..48c8923dab7cd 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -210,22 +210,22 @@ def test_gbq_if_exists( test_size = 200 df = make_mixed_dataframe_v2(test_size) + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) + with expectation: - df.to_gbq( + df.iloc[:100].to_gbq( destination_table, _get_project_id(), + if_exists=if_exists, chunksize=None, credentials=_get_credentials(), ) - df.iloc[:100].to_gbq( - destination_table, - _get_project_id(), - if_exists=if_exists, - chunksize=None, - credentials=_get_credentials(), - ) - result = pd.read_gbq( f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), From c4a983022c6b50d9fbcd82cc082aa9b8f92523e0 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 2 Jan 2020 23:05:16 +0000 Subject: [PATCH 08/12] Don't Error if Dataset Exists --- pandas/tests/io/test_gbq.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 48c8923dab7cd..2fe9443b2fea6 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -157,13 +157,13 @@ def gbq_dataset(self): self.client = _get_client() self.dataset = self.client.dataset(dataset_id) - try: - # Clean-up previous test runs. - self.client.delete_dataset(self.dataset, delete_contents=True) - except api_exceptions.NotFound: - pass # It's OK if the dataset doesn't already exist. - self.client.create_dataset(bigquery.Dataset(self.dataset)) + # Ensure previous test runs are removed + self.client.delete_dataset( + self.dataset, delete_contents=True, not_found_ok=True + ) + + self.client.create_dataset(bigquery.Dataset(self.dataset), exists_ok=True) table_name = "".join(random.choices(string.ascii_lowercase, k=10)) destination_table = f"{dataset_id}.{table_name}" From 5e949565b35292d25cb23c72d4daba2958167561 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 2 Jan 2020 23:24:47 +0000 Subject: [PATCH 09/12] Teardown tables --- pandas/tests/io/test_gbq.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 2fe9443b2fea6..4f9cb71333711 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -158,19 +158,15 @@ def gbq_dataset(self): self.client = _get_client() self.dataset = self.client.dataset(dataset_id) - # Ensure previous test runs are removed - self.client.delete_dataset( - self.dataset, delete_contents=True, not_found_ok=True - ) - + # Create the dataset if it doesn't already exist self.client.create_dataset(bigquery.Dataset(self.dataset), exists_ok=True) table_name = "".join(random.choices(string.ascii_lowercase, k=10)) destination_table = f"{dataset_id}.{table_name}" yield destination_table - # Teardown Dataset - self.client.delete_dataset(self.dataset, delete_contents=True) + # Teardown tables created + self.client.delete_table(destination_table, not_found_ok=True) def test_roundtrip(self, gbq_dataset): destination_table = gbq_dataset From a91b8a3dfe0a3c6efea4a33a06676ad1ea63055e Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 2 Jan 2020 23:38:11 +0000 Subject: [PATCH 10/12] Give dataset random name --- pandas/tests/io/test_gbq.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 4f9cb71333711..9deeeadce16bc 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -147,26 +147,30 @@ def mock_read_gbq(sql, **kwargs): @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: + @staticmethod + def generate_rand_str(length: int = 10) -> str: + return "".join(random.choices(string.ascii_lowercase, k=length)) + @pytest.fixture() def gbq_dataset(self): # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() - dataset_id = "pydata_pandas_bq_testing_py31" + dataset_id = "pydata_pandas_bq_testing_" + self.generate_rand_str() self.client = _get_client() self.dataset = self.client.dataset(dataset_id) - # Create the dataset if it doesn't already exist - self.client.create_dataset(bigquery.Dataset(self.dataset), exists_ok=True) + # Create the dataset + self.client.create_dataset(bigquery.Dataset(self.dataset)) - table_name = "".join(random.choices(string.ascii_lowercase, k=10)) + table_name = self.generate_rand_str() destination_table = f"{dataset_id}.{table_name}" yield destination_table - # Teardown tables created - self.client.delete_table(destination_table, not_found_ok=True) + # Teardown Dataset + self.client.delete_dataset(self.dataset, delete_contents=True) def test_roundtrip(self, gbq_dataset): destination_table = gbq_dataset From 7687d4397194e83504e0c60d173a216c4a6662aa Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Fri, 3 Jan 2020 02:08:34 +0000 Subject: [PATCH 11/12] Make func module level --- pandas/tests/io/test_gbq.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 9deeeadce16bc..5e800194d82c9 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -145,19 +145,19 @@ def mock_read_gbq(sql, **kwargs): assert "progress_bar_type" not in captured_kwargs +def generate_rand_str(length: int = 10) -> str: + return "".join(random.choices(string.ascii_lowercase, k=length)) + + @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: - @staticmethod - def generate_rand_str(length: int = 10) -> str: - return "".join(random.choices(string.ascii_lowercase, k=length)) - @pytest.fixture() def gbq_dataset(self): # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() - dataset_id = "pydata_pandas_bq_testing_" + self.generate_rand_str() + dataset_id = "pydata_pandas_bq_testing_" + generate_rand_str() self.client = _get_client() self.dataset = self.client.dataset(dataset_id) @@ -165,7 +165,7 @@ def gbq_dataset(self): # Create the dataset self.client.create_dataset(bigquery.Dataset(self.dataset)) - table_name = self.generate_rand_str() + table_name = generate_rand_str() destination_table = f"{dataset_id}.{table_name}" yield destination_table From ceef3f6ba8dabe8291c571f979d730e90e35d4ef Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Fri, 3 Jan 2020 02:09:46 +0000 Subject: [PATCH 12/12] Make func module level --- pandas/tests/io/test_gbq.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 5e800194d82c9..7a5eba5264421 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -68,6 +68,10 @@ def _get_client(): return bigquery.Client(project=project_id, credentials=credentials) +def generate_rand_str(length: int = 10) -> str: + return "".join(random.choices(string.ascii_lowercase, k=length)) + + def make_mixed_dataframe_v2(test_size): # create df to test for all BQ datatypes except RECORD bools = np.random.randint(2, size=(1, test_size)).astype(bool) @@ -145,10 +149,6 @@ def mock_read_gbq(sql, **kwargs): assert "progress_bar_type" not in captured_kwargs -def generate_rand_str(length: int = 10) -> str: - return "".join(random.choices(string.ascii_lowercase, k=length)) - - @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: @pytest.fixture()