diff --git a/.gitignore b/.gitignore index 33e26ed4..251cc50d 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,8 @@ .ipynb_checkpoints .tags .pytest_cache -.testmondata +.testmon* +.vscode/ # Docs # ######## diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index a454ec84..5d7d4dd7 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -4,6 +4,13 @@ Changelog 0.5.0 / TBD ----------- +- Project ID parameter is optional in ``read_gbq`` and ``to_gbq`` when it can + inferred from the environment. Note: you must still pass in a project ID when + using user-based authentication. (:issue:`103`) + +Internal changes +~~~~~~~~~~~~~~~~ + - Tests now use `nox` to run in multiple Python environments. (:issue:`52`) - Renamed internal modules. (:issue:`154`) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 5c8af053..b7447074 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -186,7 +186,15 @@ def __init__(self, project_id, reauth=False, self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials_path = _get_credentials_file() - self.credentials = self.get_credentials() + self.credentials, default_project = self.get_credentials() + + if self.project_id is None: + self.project_id = default_project + + if self.project_id is None: + raise ValueError( + 'Could not determine project ID and one was not supplied.') + self.client = self.get_client() # BQ Queries costs $5 per TB. First 1 TB per month is free @@ -196,12 +204,14 @@ def __init__(self, project_id, reauth=False, def get_credentials(self): if self.private_key: return self.get_service_account_credentials() - else: - # Try to retrieve Application Default Credentials - credentials = self.get_application_default_credentials() - if not credentials: - credentials = self.get_user_account_credentials() - return credentials + + # Try to retrieve Application Default Credentials + credentials, default_project = ( + self.get_application_default_credentials()) + if credentials: + return credentials, default_project + + return self.get_user_account_credentials(), None def get_application_default_credentials(self): """ @@ -227,11 +237,13 @@ def get_application_default_credentials(self): from google.auth.exceptions import DefaultCredentialsError try: - credentials, _ = google.auth.default(scopes=[self.scope]) + credentials, default_project = google.auth.default( + scopes=[self.scope]) except (DefaultCredentialsError, IOError): - return None + return None, None - return _try_credentials(self.project_id, credentials) + billing_project = self.project_id or default_project + return _try_credentials(billing_project, credentials), default_project def load_user_account_credentials(self): """ @@ -412,7 +424,7 @@ def get_service_account_credentials(self): request = google.auth.transport.requests.Request() credentials.refresh(request) - return credentials + return credentials, json_key.get('project_id') except (KeyError, ValueError, TypeError, AttributeError): raise InvalidPrivateKeyFormat( "Private key is missing or invalid. It should be service " @@ -750,7 +762,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, ---------- query : str SQL-Like Query to return data values - project_id : str + project_id : str (optional when available in environment) Google BigQuery Account project ID. index_col : str (optional) Name of result column to use for index in results DataFrame @@ -809,9 +821,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, "a future version. Set logging level in order to vary " "verbosity", FutureWarning, stacklevel=1) - if not project_id: - raise TypeError("Missing required parameter: project_id") - if dialect not in ('legacy', 'standard'): raise ValueError("'{0}' is not valid for dialect".format(dialect)) @@ -859,7 +868,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, return final_df -def to_gbq(dataframe, destination_table, project_id, chunksize=None, +def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, verbose=None, reauth=False, if_exists='fail', private_key=None, auth_local_webserver=False, table_schema=None): """Write a DataFrame to a Google BigQuery table. @@ -891,7 +900,7 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=None, DataFrame to be written destination_table : string Name of table to be written, in the form 'dataset.tablename' - project_id : str + project_id : str (optional when available in environment) Google BigQuery Account project ID. chunksize : int (default None) Number of rows to be inserted in each chunk from the dataframe. Use diff --git a/tests/system.py b/tests/system.py index 8782b580..6f57df3e 100644 --- a/tests/system.py +++ b/tests/system.py @@ -50,12 +50,8 @@ def _get_dataset_prefix_random(): def _get_project_id(): - - project = os.environ.get('GBQ_PROJECT_ID') - if not project: - pytest.skip( - "Cannot run integration tests without a project id") - return project + return (os.environ.get('GBQ_PROJECT_ID') + or os.environ.get('GOOGLE_CLOUD_PROJECT')) # noqa def _get_private_key_path(): @@ -85,9 +81,12 @@ def _test_imports(): gbq._test_google_api_imports() -@pytest.fixture -def project(): - return _get_project_id() +@pytest.fixture(params=['env']) +def project(request): + if request.param == 'env': + return _get_project_id() + elif request.param == 'none': + return None def _check_if_can_get_correct_default_credentials(): @@ -99,11 +98,13 @@ def _check_if_can_get_correct_default_credentials(): from google.auth.exceptions import DefaultCredentialsError try: - credentials, _ = google.auth.default(scopes=[gbq.GbqConnector.scope]) + credentials, project = google.auth.default( + scopes=[gbq.GbqConnector.scope]) except (DefaultCredentialsError, IOError): return False - return gbq._try_credentials(_get_project_id(), credentials) is not None + return gbq._try_credentials( + project or _get_project_id(), credentials) is not None def clean_gbq_environment(dataset_prefix, private_key=None): @@ -171,46 +172,14 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.fixture(params=['local', 'service_path', 'service_creds']) -def auth_type(request): - - auth = request.param - - if auth == 'local': - - if _in_travis_environment(): - pytest.skip("Cannot run local auth in travis environment") - - elif auth == 'service_path': - - if _in_travis_environment(): - pytest.skip("Only run one auth type in Travis to save time") - - _skip_if_no_private_key_path() - elif auth == 'service_creds': - _skip_if_no_private_key_contents() - else: - raise ValueError - return auth - - @pytest.fixture() -def credentials(auth_type): - - if auth_type == 'local': - return None - - elif auth_type == 'service_path': - return _get_private_key_path() - elif auth_type == 'service_creds': - return _get_private_key_contents() - else: - raise ValueError +def credentials(): + _skip_if_no_private_key_contents() + return _get_private_key_contents() @pytest.fixture() def gbq_connector(project, credentials): - return gbq.GbqConnector(project, private_key=credentials) @@ -220,7 +189,7 @@ def test_should_be_able_to_make_a_connector(self, gbq_connector): assert gbq_connector is not None, 'Could not create a GbqConnector' def test_should_be_able_to_get_valid_credentials(self, gbq_connector): - credentials = gbq_connector.get_credentials() + credentials, _ = gbq_connector.get_credentials() assert credentials.valid def test_should_be_able_to_get_a_bigquery_client(self, gbq_connector): @@ -236,14 +205,12 @@ def test_should_be_able_to_get_results_from_query(self, gbq_connector): assert pages is not None -class TestGBQConnectorIntegrationWithLocalUserAccountAuth(object): +class TestAuth(object): @pytest.fixture(autouse=True) - def setup(self, project): - - _skip_local_auth_if_in_travis_env() - - self.sut = gbq.GbqConnector(project, auth_local_webserver=True) + def setup(self, gbq_connector): + self.sut = gbq_connector + self.sut.auth_local_webserver = True def test_get_application_default_credentials_does_not_throw_error(self): if _check_if_can_get_correct_default_credentials(): @@ -252,9 +219,9 @@ def test_get_application_default_credentials_does_not_throw_error(self): from google.auth.exceptions import DefaultCredentialsError with mock.patch('google.auth.default', side_effect=DefaultCredentialsError()): - credentials = self.sut.get_application_default_credentials() + credentials, _ = self.sut.get_application_default_credentials() else: - credentials = self.sut.get_application_default_credentials() + credentials, _ = self.sut.get_application_default_credentials() assert credentials is None def test_get_application_default_credentials_returns_credentials(self): @@ -262,10 +229,14 @@ def test_get_application_default_credentials_returns_credentials(self): pytest.skip("Cannot get default_credentials " "from the environment!") from google.auth.credentials import Credentials - credentials = self.sut.get_application_default_credentials() + credentials, default_project = ( + self.sut.get_application_default_credentials()) + assert isinstance(credentials, Credentials) + assert default_project is not None def test_get_user_account_credentials_bad_file_returns_credentials(self): + _skip_local_auth_if_in_travis_env() from google.auth.credentials import Credentials with mock.patch('__main__.open', side_effect=IOError()): @@ -273,6 +244,8 @@ def test_get_user_account_credentials_bad_file_returns_credentials(self): assert isinstance(credentials, Credentials) def test_get_user_account_credentials_returns_credentials(self): + _skip_local_auth_if_in_travis_env() + from google.auth.credentials import Credentials credentials = self.sut.get_user_account_credentials() assert isinstance(credentials, Credentials) @@ -515,7 +488,8 @@ def test_malformed_query(self): def test_bad_project_id(self): with pytest.raises(gbq.GenericGBQException): - gbq.read_gbq("SELECT 1", project_id='001', + gbq.read_gbq('SELCET * FROM [publicdata:samples.shakespeare]', + project_id='not-my-project', private_key=self.credentials) def test_bad_table_name(self): diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index ae1d35c1..85e4f427 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -15,9 +15,13 @@ @pytest.fixture(autouse=True) def mock_bigquery_client(monkeypatch): + from google.api_core.exceptions import NotFound import google.cloud.bigquery import google.cloud.bigquery.table mock_client = mock.create_autospec(google.cloud.bigquery.Client) + mock_schema = [ + google.cloud.bigquery.SchemaField('_f0', 'INTEGER') + ] # Mock out SELECT 1 query results. mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) mock_query.job_id = 'some-random-id' @@ -25,11 +29,12 @@ def mock_bigquery_client(monkeypatch): mock_rows = mock.create_autospec( google.cloud.bigquery.table.RowIterator) mock_rows.total_rows = 1 - mock_rows.schema = [ - google.cloud.bigquery.SchemaField('_f0', 'INTEGER')] + mock_rows.schema = mock_schema mock_rows.__iter__.return_value = [(1,)] mock_query.result.return_value = mock_rows mock_client.query.return_value = mock_query + # Mock table creation. + mock_client.get_table.side_effect = NotFound('nope') monkeypatch.setattr( gbq.GbqConnector, 'get_client', lambda _: mock_client) @@ -42,11 +47,7 @@ def no_auth(monkeypatch): monkeypatch.setattr( gbq.GbqConnector, 'get_application_default_credentials', - lambda _: mock_credentials) - monkeypatch.setattr( - gbq.GbqConnector, - 'get_user_account_credentials', - lambda _: mock_credentials) + lambda _: (mock_credentials, 'default-project')) def test_should_return_credentials_path_set_by_env_var(): @@ -76,12 +77,16 @@ def test_should_return_bigquery_correctly_typed( def test_to_gbq_should_fail_if_invalid_table_name_passed(): with pytest.raises(gbq.NotFoundException): - gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234") + gbq.to_gbq(DataFrame([[1]]), 'invalid_table_name', project_id="1234") -def test_to_gbq_with_no_project_id_given_should_fail(): +def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): + monkeypatch.setattr( + gbq.GbqConnector, + 'get_application_default_credentials', + lambda _: None) with pytest.raises(TypeError): - gbq.to_gbq(DataFrame(), 'dataset.tablename') + gbq.to_gbq(DataFrame([[1]]), 'dataset.tablename') def test_to_gbq_with_verbose_new_pandas_warns_deprecation(): @@ -95,7 +100,7 @@ def test_to_gbq_with_verbose_new_pandas_warns_deprecation(): mock_version.side_effect = [min_bq_version, pandas_version] try: gbq.to_gbq( - DataFrame(), + DataFrame([[1]]), 'dataset.tablename', project_id='my-project', verbose=True) @@ -114,7 +119,7 @@ def test_to_gbq_with_not_verbose_new_pandas_warns_deprecation(): mock_version.side_effect = [min_bq_version, pandas_version] try: gbq.to_gbq( - DataFrame(), + DataFrame([[1]]), 'dataset.tablename', project_id='my-project', verbose=False) @@ -132,7 +137,7 @@ def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(recwarn): mock_version.side_effect = [min_bq_version, pandas_version] try: gbq.to_gbq( - DataFrame(), 'dataset.tablename', project_id='my-project') + DataFrame([[1]]), 'dataset.tablename', project_id='my-project') except gbq.TableCreationError: pass assert len(recwarn) == 0 @@ -148,7 +153,7 @@ def test_to_gbq_with_verbose_old_pandas_no_warnings(recwarn): mock_version.side_effect = [min_bq_version, pandas_version] try: gbq.to_gbq( - DataFrame(), + DataFrame([[1]]), 'dataset.tablename', project_id='my-project', verbose=True) @@ -157,11 +162,20 @@ def test_to_gbq_with_verbose_old_pandas_no_warnings(recwarn): assert len(recwarn) == 0 -def test_read_gbq_with_no_project_id_given_should_fail(): +def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): + monkeypatch.setattr( + gbq.GbqConnector, + 'get_application_default_credentials', + lambda _: None) with pytest.raises(TypeError): gbq.read_gbq('SELECT 1') +def test_read_gbq_with_inferred_project_id(monkeypatch): + df = gbq.read_gbq('SELECT 1') + assert df is not None + + def test_that_parse_data_works_properly(): from google.cloud.bigquery.table import Row test_schema = {'fields': [