diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 732be618..961f289c 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,17 @@ Changelog ========= +.. _changelog-0.9.0: + +0.9.0 / TBD +----------- + +Internal changes +~~~~~~~~~~~~~~~~ + +- **New dependency** Use the ``pydata-google-auth`` package for + authentication. (:issue:`241`) + .. _changelog-0.8.0: 0.8.0 / 2018-11-12 diff --git a/docs/source/conf.py b/docs/source/conf.py index 40c39114..1959fc36 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -372,6 +372,7 @@ intersphinx_mapping = { "https://docs.python.org/": None, "https://pandas.pydata.org/pandas-docs/stable/": None, + "https://pydata-google-auth.readthedocs.io/en/latest/": None, "https://google-auth.readthedocs.io/en/latest/": None, } diff --git a/docs/source/howto/authentication.rst b/docs/source/howto/authentication.rst index a44a61c7..4612bc4b 100644 --- a/docs/source/howto/authentication.rst +++ b/docs/source/howto/authentication.rst @@ -7,7 +7,7 @@ pandas-gbq `authenticates with the Google BigQuery service .. _authentication: -Authentication with a Service Account +Authenticating with a Service Account -------------------------------------- Using service account credentials is particularly useful when working on @@ -57,10 +57,81 @@ To use service account credentials, set the ``credentials`` parameter to the res ) df = pandas_gbq.read_gbq(sql, project_id="YOUR-PROJECT-ID", credentials=credentials) +Use the :func:`~google.oauth2.service_account.Credentials.with_scopes` method +to use authorize with specific OAuth2 scopes, which may be required in +queries to federated data sources such as Google Sheets. + +.. code:: python + + credentials = ... + credentials = credentials.with_scopes( + [ + 'https://www.googleapis.com/auth/drive', + 'https://www.googleapis.com/auth/cloud-platform', + ], + ) + df = pandas_gbq.read_gbq(..., credentials=credentials) + See the `Getting started with authentication on Google Cloud Platform `_ guide for more information on service accounts. + +Authenticating with a User Account +---------------------------------- + +Use the `pydata-google-auth `__ +library to authenticate with a user account (i.e. a G Suite or Gmail +account). The :func:`pydata_google_auth.get_user_credentials` function loads +credentials from a cache on disk or initiates an OAuth 2.0 flow if cached +credentials are not found. + +.. code:: python + + import pandas_gbq + import pydata_google_auth + + SCOPES = [ + 'https://www.googleapis.com/auth/cloud-platform', + 'https://www.googleapis.com/auth/drive', + ] + + credentials = pydata_google_auth.get_user_credentials( + SCOPES, + # Set auth_local_webserver to True to have a slightly more convienient + # authorization flow. Note, this doesn't work if you're running from a + # notebook on a remote sever, such as over SSH or with Google Colab. + auth_local_webserver=True, + + + df = pandas_gbq.read_gbq( + "SELECT my_col FROM `my_dataset.my_table`", + project_id='YOUR-PROJECT-ID', + credentials=credentials, + ) + +.. warning:: + + Do not store credentials on disk when using shared computing resources + such as a GCE VM or Colab notebook. Use the + :data:`pydata_google_auth.cache.NOOP` cache to avoid writing credentials + to disk. + + .. code:: python + + import pydata_google_auth.cache + + credentials = pydata_google_auth.get_user_credentials( + SCOPES, + # Use the NOOP cache to avoid writing credentials to disk. + cache=pydata_google_auth.cache.NOOP, + ) + +Additional information on the user credentials authentication mechanism +can be found in the `Google Cloud authentication guide +`__. + + Default Authentication Methods ------------------------------ @@ -71,6 +142,19 @@ methods: 1. In-memory, cached credentials at ``pandas_gbq.context.credentials``. See :attr:`pandas_gbq.Context.credentials` for details. + .. code:: python + + import pandas_gbq + + credentials = ... # From google-auth or pydata-google-auth library. + + # Update the in-memory credentials cache (added in pandas-gbq 0.7.0). + pandas_gbq.context.credentials = credentials + pandas_gbq.context.project = "your-project-id" + + # The credentials and project_id arguments can be omitted. + df = pandas_gbq.read_gbq("SELECT my_col FROM `my_dataset.my_table`") + 2. Application Default Credentials via the :func:`google.auth.default` function. @@ -87,13 +171,14 @@ methods: 3. User account credentials. pandas-gbq loads cached credentials from a hidden user folder on the - operating system. Override the location of the cached user credentials - by setting the ``PANDAS_GBQ_CREDENTIALS_FILE`` environment variable. + operating system. + + Windows + ``%APPDATA%\pandas_gbq\bigquery_credentials.dat`` + + Linux/Mac/Unix + ``~/.config/pandas_gbq/bigquery_credentials.dat`` If pandas-gbq does not find cached credentials, it opens a browser window asking for you to authenticate to your BigQuery account using the product name ``pandas GBQ``. - - Additional information on the user credentails authentication mechanism - can be found `here - `__. diff --git a/docs/source/install.rst b/docs/source/install.rst index c64c7939..457a33e9 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -37,6 +37,7 @@ Dependencies This module requires following additional dependencies: +- `pydata-google-auth `__: Helpers for authentication to Google's API - `google-auth `__: authentication and authorization for Google's API - `google-auth-oauthlib `__: integration with `oauthlib `__ for end-user authentication - `google-cloud-bigquery `__: Google Cloud client library for BigQuery diff --git a/pandas_gbq/auth.py b/pandas_gbq/auth.py index 9f32bac7..b6dca129 100644 --- a/pandas_gbq/auth.py +++ b/pandas_gbq/auth.py @@ -12,40 +12,48 @@ logger = logging.getLogger(__name__) +CREDENTIALS_CACHE_DIRNAME = "pandas_gbq" +CREDENTIALS_CACHE_FILENAME = "bigquery_credentials.dat" SCOPES = ["https://www.googleapis.com/auth/bigquery"] +# The following constants are used for end-user authentication. +# It identifies the application that is requesting permission to access the +# BigQuery API on behalf of a G Suite or Gmail user. +# +# In a web application, the client secret would be kept secret, but this is not +# possible for applications that are installed locally on an end-user's +# machine. +# +# See: https://cloud.google.com/docs/authentication/end-user for details. +CLIENT_ID = ( + "495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com" +) +CLIENT_SECRET = "kOc9wMptUtxkcIFbtZCcrEAc" + def get_credentials( - private_key=None, - project_id=None, - reauth=False, - auth_local_webserver=False, - try_credentials=None, + private_key=None, project_id=None, reauth=False, auth_local_webserver=False ): - if try_credentials is None: - try_credentials = _try_credentials + import pydata_google_auth if private_key: return get_service_account_credentials(private_key) - # Try to retrieve Application Default Credentials - credentials, default_project = get_application_default_credentials( - try_credentials, project_id=project_id - ) - - if credentials: - return credentials, default_project - - credentials = get_user_account_credentials( - try_credentials, - project_id=project_id, - reauth=reauth, + credentials, default_project_id = pydata_google_auth.default( + SCOPES, + client_id=CLIENT_ID, + client_secret=CLIENT_SECRET, + credentials_cache=get_credentials_cache(reauth), auth_local_webserver=auth_local_webserver, ) + + project_id = project_id or default_project_id return credentials, project_id def get_service_account_credentials(private_key): + """DEPRECATED: Load service account credentials from key data or key path.""" + import google.auth.transport.requests from google.oauth2.service_account import Credentials @@ -87,233 +95,14 @@ def get_service_account_credentials(private_key): ) -def get_application_default_credentials(try_credentials, project_id=None): - """ - This method tries to retrieve the "default application credentials". - This could be useful for running code on Google Cloud Platform. - - Parameters - ---------- - project_id (str, optional): Override the default project ID. - - Returns - ------- - - GoogleCredentials, - If the default application credentials can be retrieved - from the environment. The retrieved credentials should also - have access to the project (project_id) on BigQuery. - - OR None, - If default application credentials can not be retrieved - from the environment. Or, the retrieved credentials do not - have access to the project (project_id) on BigQuery. - """ - import google.auth - from google.auth.exceptions import DefaultCredentialsError - - try: - credentials, default_project = google.auth.default(scopes=SCOPES) - except (DefaultCredentialsError, IOError): - return None, None - - # Even though we now have credentials, check that the credentials can be - # used with BigQuery. For example, we could be running on a GCE instance - # that does not allow the BigQuery scopes. - billing_project = project_id or default_project - return try_credentials(billing_project, credentials), billing_project - - -def get_user_account_credentials( - try_credentials, - project_id=None, - reauth=False, - auth_local_webserver=False, - credentials_path=None, -): - """Gets user account credentials. +def get_credentials_cache(reauth,): + import pydata_google_auth.cache - This method authenticates using user credentials, either loading saved - credentials from a file or by going through the OAuth flow. - - Parameters - ---------- - None - - Returns - ------- - GoogleCredentials : credentials - Credentials for the user with BigQuery access. - """ - from google_auth_oauthlib.flow import InstalledAppFlow - from oauthlib.oauth2.rfc6749.errors import OAuth2Error - - # Use the default credentials location under ~/.config and the - # equivalent directory on windows if the user has not specified a - # credentials path. - if not credentials_path: - credentials_path = get_default_credentials_path() - - # Previously, pandas-gbq saved user account credentials in the - # current working directory. If the bigquery_credentials.dat file - # exists in the current working directory, move the credentials to - # the new default location. - if os.path.isfile("bigquery_credentials.dat"): - os.rename("bigquery_credentials.dat", credentials_path) - - credentials = None - if not reauth: - credentials = load_user_account_credentials( - try_credentials, - project_id=project_id, - credentials_path=credentials_path, + if reauth: + return pydata_google_auth.cache.WriteOnlyCredentialsCache( + dirname=CREDENTIALS_CACHE_DIRNAME, + filename=CREDENTIALS_CACHE_FILENAME, ) - - client_config = { - "installed": { - "client_id": ( - "495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd" - ".apps.googleusercontent.com" - ), - "client_secret": "kOc9wMptUtxkcIFbtZCcrEAc", - "redirect_uris": ["urn:ietf:wg:oauth:2.0:oob"], - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://accounts.google.com/o/oauth2/token", - } - } - - if credentials is None: - app_flow = InstalledAppFlow.from_client_config( - client_config, scopes=SCOPES - ) - - try: - if auth_local_webserver: - credentials = app_flow.run_local_server() - else: - credentials = app_flow.run_console() - except OAuth2Error as ex: - raise pandas_gbq.exceptions.AccessDenied( - "Unable to get valid credentials: {0}".format(ex) - ) - - save_user_account_credentials(credentials, credentials_path) - - return credentials - - -def load_user_account_credentials( - try_credentials, project_id=None, credentials_path=None -): - """ - Loads user account credentials from a local file. - - .. versionadded 0.2.0 - - Parameters - ---------- - None - - Returns - ------- - - GoogleCredentials, - If the credentials can loaded. The retrieved credentials should - also have access to the project (project_id) on BigQuery. - - OR None, - If credentials can not be loaded from a file. Or, the retrieved - credentials do not have access to the project (project_id) - on BigQuery. - """ - import google.auth.transport.requests - from google.oauth2.credentials import Credentials - - try: - with open(credentials_path) as credentials_file: - credentials_json = json.load(credentials_file) - except (IOError, ValueError): - return None - - credentials = Credentials( - token=credentials_json.get("access_token"), - refresh_token=credentials_json.get("refresh_token"), - id_token=credentials_json.get("id_token"), - token_uri=credentials_json.get("token_uri"), - client_id=credentials_json.get("client_id"), - client_secret=credentials_json.get("client_secret"), - scopes=credentials_json.get("scopes"), + return pydata_google_auth.cache.ReadWriteCredentialsCache( + dirname=CREDENTIALS_CACHE_DIRNAME, filename=CREDENTIALS_CACHE_FILENAME ) - - # Refresh the token before trying to use it. - request = google.auth.transport.requests.Request() - credentials.refresh(request) - - return try_credentials(project_id, credentials) - - -def get_default_credentials_path(): - """ - Gets the default path to the BigQuery credentials - - .. versionadded 0.3.0 - - Returns - ------- - Path to the BigQuery credentials - """ - if os.name == "nt": - config_path = os.environ["APPDATA"] - else: - config_path = os.path.join(os.path.expanduser("~"), ".config") - - config_path = os.path.join(config_path, "pandas_gbq") - - # Create a pandas_gbq directory in an application-specific hidden - # user folder on the operating system. - if not os.path.exists(config_path): - os.makedirs(config_path) - - return os.path.join(config_path, "bigquery_credentials.dat") - - -def save_user_account_credentials(credentials, credentials_path): - """ - Saves user account credentials to a local file. - - .. versionadded 0.2.0 - """ - try: - with open(credentials_path, "w") as credentials_file: - credentials_json = { - "refresh_token": credentials.refresh_token, - "id_token": credentials.id_token, - "token_uri": credentials.token_uri, - "client_id": credentials.client_id, - "client_secret": credentials.client_secret, - "scopes": credentials.scopes, - } - json.dump(credentials_json, credentials_file) - except IOError: - logger.warning("Unable to save credentials.") - - -def _try_credentials(project_id, credentials): - from google.cloud import bigquery - import google.api_core.exceptions - import google.auth.exceptions - - if not credentials: - return None - if not project_id: - return credentials - - try: - client = bigquery.Client(project=project_id, credentials=credentials) - # Check if the application has rights to the BigQuery project - client.query("SELECT 1").result() - return credentials - except google.api_core.exceptions.GoogleAPIError: - return None - except google.auth.exceptions.RefreshError: - # Sometimes (such as on Travis) google-auth returns GCE credentials, - # but fetching the token for those credentials doesn't actually work. - # See: - # https://github.com/googleapis/google-auth-library-python/issues/287 - return None diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index c71b6cc5..d35eba05 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -68,6 +68,13 @@ def _check_google_client_version(): def _test_google_api_imports(): + try: + import pydata_google_auth + except ImportError as ex: + raise ImportError( + "pandas-gbq requires pydata-google-auth: {0}".format(ex) + ) + try: from google_auth_oauthlib.flow import InstalledAppFlow # noqa except ImportError as ex: @@ -297,7 +304,6 @@ def __init__( auth_local_webserver=False, dialect="legacy", location=None, - try_credentials=None, credentials=None, ): global context @@ -313,7 +319,6 @@ def __init__( self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials = credentials - self.credentials_path = _get_credentials_file() default_project = None # Load credentials from cache. @@ -328,7 +333,6 @@ def __init__( project_id=project_id, reauth=reauth, auth_local_webserver=auth_local_webserver, - try_credentials=try_credentials, ) if self.project_id is None: @@ -635,10 +639,6 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema): table.create(table_id, table_schema) -def _get_credentials_file(): - return os.environ.get("PANDAS_GBQ_CREDENTIALS_FILE") - - def _parse_schema(schema_fields): # see: # http://pandas.pydata.org/pandas-docs/dev/missing_data.html @@ -1003,9 +1003,6 @@ def to_gbq( reauth=reauth, auth_local_webserver=auth_local_webserver, location=location, - # Avoid reads when writing tables. - # https://github.com/pydata/pandas-gbq/issues/202 - try_credentials=lambda project, creds: creds, credentials=credentials, private_key=private_key, ) diff --git a/setup.py b/setup.py index bd0c0d11..e53d43f5 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ def readme(): INSTALL_REQUIRES = [ "setuptools", "pandas", + "pydata-google-auth", "google-auth", "google-auth-oauthlib", "google-cloud-bigquery>=0.32.0", diff --git a/tests/system/test_auth.py b/tests/system/test_auth.py index c48f4520..d02f153a 100644 --- a/tests/system/test_auth.py +++ b/tests/system/test_auth.py @@ -9,6 +9,35 @@ from pandas_gbq import auth +def mock_default_credentials(scopes=None, request=None): + return (None, None) + + +def _try_credentials(project_id, credentials): + from google.cloud import bigquery + import google.api_core.exceptions + import google.auth.exceptions + + if not credentials: + return None + if not project_id: + return credentials + + try: + client = bigquery.Client(project=project_id, credentials=credentials) + # Check if the application has rights to the BigQuery project + client.query("SELECT 1").result() + return credentials + except google.api_core.exceptions.GoogleAPIError: + return None + except google.auth.exceptions.RefreshError: + # Sometimes (such as on Travis) google-auth returns GCE credentials, + # but fetching the token for those credentials doesn't actually work. + # See: + # https://github.com/googleapis/google-auth-library-python/issues/287 + return None + + def _check_if_can_get_correct_default_credentials(): # Checks if "Application Default Credentials" can be fetched # from the environment the tests are running in. @@ -26,7 +55,7 @@ def _check_if_can_get_correct_default_credentials(): except (DefaultCredentialsError, IOError): return False - return auth._try_credentials(project, credentials) is not None + return _try_credentials(project, credentials) is not None def test_should_be_able_to_get_valid_credentials(project_id, private_key_path): @@ -43,7 +72,7 @@ def test_get_service_account_credentials_private_key_path(private_key_path): private_key_path ) assert isinstance(credentials, Credentials) - assert auth._try_credentials(project_id, credentials) is not None + assert _try_credentials(project_id, credentials) is not None def test_get_service_account_credentials_private_key_contents( @@ -55,71 +84,44 @@ def test_get_service_account_credentials_private_key_contents( private_key_contents ) assert isinstance(credentials, Credentials) - assert auth._try_credentials(project_id, credentials) is not None - - -def test_get_application_default_credentials_does_not_throw_error(): - if _check_if_can_get_correct_default_credentials(): - # Can get real credentials, so mock it out to fail. - from google.auth.exceptions import DefaultCredentialsError - - with mock.patch( - "google.auth.default", side_effect=DefaultCredentialsError() - ): - credentials, _ = auth.get_application_default_credentials( - try_credentials=auth._try_credentials - ) - else: - credentials, _ = auth.get_application_default_credentials( - try_credentials=auth._try_credentials - ) - assert credentials is None - - -def test_get_application_default_credentials_returns_credentials(): - if not _check_if_can_get_correct_default_credentials(): - pytest.skip("Cannot get default_credentials " "from the environment!") - from google.auth.credentials import Credentials - - credentials, default_project = auth.get_application_default_credentials( - try_credentials=auth._try_credentials - ) - - assert isinstance(credentials, Credentials) - assert default_project is not None + assert _try_credentials(project_id, credentials) is not None @pytest.mark.local_auth -def test_get_user_account_credentials_bad_file_returns_user_credentials(): +def test_get_credentials_bad_file_returns_user_credentials( + project_id, monkeypatch +): + import google.auth from google.auth.credentials import Credentials + monkeypatch.setattr(google.auth, "default", mock_default_credentials) + with mock.patch("__main__.open", side_effect=IOError()): - credentials = auth.get_user_account_credentials( - try_credentials=auth._try_credentials + credentials, _ = auth.get_credentials( + project_id=project_id, auth_local_webserver=True ) assert isinstance(credentials, Credentials) @pytest.mark.local_auth -def test_get_user_account_credentials_returns_credentials(project_id): - from google.auth.credentials import Credentials +def test_get_credentials_user_credentials_with_reauth(project_id, monkeypatch): + import google.auth + + monkeypatch.setattr(google.auth, "default", mock_default_credentials) - credentials = auth.get_user_account_credentials( - project_id=project_id, - auth_local_webserver=True, - try_credentials=auth._try_credentials, + credentials, _ = auth.get_credentials( + project_id=project_id, reauth=True, auth_local_webserver=True ) - assert isinstance(credentials, Credentials) + assert credentials.valid @pytest.mark.local_auth -def test_get_user_account_credentials_reauth_returns_credentials(project_id): - from google.auth.credentials import Credentials +def test_get_credentials_user_credentials(project_id, monkeypatch): + import google.auth + + monkeypatch.setattr(google.auth, "default", mock_default_credentials) - credentials = auth.get_user_account_credentials( - project_id=project_id, - auth_local_webserver=True, - reauth=True, - try_credentials=auth._try_credentials, + credentials, _ = auth.get_credentials( + project_id=project_id, auth_local_webserver=True ) - assert isinstance(credentials, Credentials) + assert credentials.valid diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index d8107a40..f9f0dc94 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -86,6 +86,7 @@ def mock_default_credentials(scopes=None, request=None): def test_get_credentials_load_user_no_default(monkeypatch): import google.auth import google.auth.credentials + import pydata_google_auth.cache def mock_default_credentials(scopes=None, request=None): return (None, None) @@ -95,14 +96,12 @@ def mock_default_credentials(scopes=None, request=None): google.auth.credentials.Credentials ) - def mock_load_credentials( - try_credentials, project_id=None, credentials_path=None - ): - return mock_user_credentials - - monkeypatch.setattr( - auth, "load_user_account_credentials", mock_load_credentials + mock_cache = mock.create_autospec( + pydata_google_auth.cache.CredentialsCache ) + mock_cache.load.return_value = mock_user_credentials + + monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache) credentials, project = auth.get_credentials() assert project is None diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 00436028..c4d8fe2e 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -29,6 +29,15 @@ def mock_none_credentials(*args, **kwargs): return None, None +def mock_get_credentials_no_project(*args, **kwargs): + import google.auth.credentials + + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials + ) + return mock_credentials, None + + def mock_get_credentials(*args, **kwargs): import google.auth.credentials @@ -50,22 +59,9 @@ def mock_get_user_credentials(*args, **kwargs): @pytest.fixture(autouse=True) def no_auth(monkeypatch): from pandas_gbq import auth + import pydata_google_auth - monkeypatch.setattr( - auth, "get_application_default_credentials", mock_get_credentials - ) - monkeypatch.setattr( - auth, "get_user_account_credentials", mock_get_user_credentials - ) - monkeypatch.setattr( - auth, "_try_credentials", lambda project_id, credentials: credentials - ) - - -def test_should_return_credentials_path_set_by_env_var(): - env = {"PANDAS_GBQ_CREDENTIALS_FILE": "/tmp/dummy.dat"} - with mock.patch.dict("os.environ", env): - assert gbq._get_credentials_file() == "/tmp/dummy.dat" + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials) @pytest.mark.parametrize( @@ -97,10 +93,10 @@ def test_to_gbq_should_fail_if_invalid_table_name_passed(): def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): - from pandas_gbq import auth + import pydata_google_auth monkeypatch.setattr( - auth, "get_application_default_credentials", mock_none_credentials + pydata_google_auth, "default", mock_get_credentials_no_project ) with pytest.raises(ValueError) as exception: @@ -252,10 +248,10 @@ def test_to_gbq_doesnt_run_query( def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): - from pandas_gbq import auth + import pydata_google_auth monkeypatch.setattr( - auth, "get_application_default_credentials", mock_none_credentials + pydata_google_auth, "default", mock_get_credentials_no_project ) with pytest.raises(ValueError) as exception: