Closed
Description
I think we need to wait before loading data after creating a new dataset.
_______ TestToGBQIntegration.test_upload_data_tokyo_non_existing_dataset _______
dataframe = bools flts ints strs times
0 True 1.508946 9 5 2021-11-08 09:47:57.17283...se -0.326603 5 3 2021-11-08 09:47:57.172839-07:00
9 True 0.453138 2 9 2021-11-08 09:47:57.172839-07:00
destination_table = 'python_bigquery_pandas_tests_system_20211108164757_29101b.to_gbq_test'
project_id = 'precise-truck-742', chunksize = None, reauth = False
if_exists = 'fail', auth_local_webserver = False
table_schema = {'fields': [{'name': 'bools', 'type': 'BOOLEAN'}, {'name': 'flts', 'type': 'FLOAT'}, {'name': 'ints', 'type': 'INTEGER'}, {'name': 'strs', 'type': 'STRING'}, {'name': 'times', 'type': 'TIMESTAMP'}]}
location = 'asia-northeast1', progress_bar = True
credentials = <google.oauth2.service_account.Credentials object at 0x7feb365b90d0>
api_method = 'load_parquet', verbose = None, private_key = None
def to_gbq(
dataframe,
destination_table,
project_id=None,
chunksize=None,
reauth=False,
if_exists="fail",
auth_local_webserver=False,
table_schema=None,
location=None,
progress_bar=True,
credentials=None,
api_method: str = "default",
verbose=None,
private_key=None,
):
"""Write a DataFrame to a Google BigQuery table.
The main method a user calls to export pandas DataFrame contents to
Google BigQuery table.
This method uses the Google Cloud client library to make requests to
Google BigQuery, documented `here
<https://google-cloud-python.readthedocs.io/en/latest/bigquery/usage.html>`__.
See the :ref:`How to authenticate with Google BigQuery <authentication>`
guide for authentication instructions.
Parameters
----------
dataframe : pandas.DataFrame
DataFrame to be written to a Google BigQuery table.
destination_table : str
Name of table to be written, in the form ``dataset.tablename`` or
``project.dataset.tablename``.
project_id : str, optional
Google BigQuery Account project ID. Optional when available from
the environment.
chunksize : int, optional
Number of rows to be inserted in each chunk from the dataframe.
Set to ``None`` to load the whole dataframe at once.
reauth : bool, default False
Force Google BigQuery to re-authenticate the user. This is useful
if multiple accounts are used.
if_exists : str, default 'fail'
Behavior when the destination table exists. Value can be one of:
``'fail'``
If table exists, do nothing.
``'replace'``
If table exists, drop it, recreate it, and insert data.
``'append'``
If table exists, insert data. Create if does not exist.
auth_local_webserver : bool, default False
Use the `local webserver flow`_ instead of the `console flow`_
when getting user credentials.
.. _local webserver flow:
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
.. _console flow:
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
.. versionadded:: 0.2.0
table_schema : list of dicts, optional
List of BigQuery table fields to which according DataFrame
columns conform to, e.g. ``[{'name': 'col1', 'type':
'STRING'},...]``. The ``type`` values must be BigQuery type names.
- If ``table_schema`` is provided, it may contain all or a subset of
DataFrame columns. If a subset is provided, the rest will be
inferred from the DataFrame dtypes. If ``table_schema`` contains
columns not in the DataFrame, they'll be ignored.
- If ``table_schema`` is **not** provided, it will be
generated according to dtypes of DataFrame columns. See
`Inferring the Table Schema
<https://pandas-gbq.readthedocs.io/en/latest/writing.html#writing-schema>`__.
for a description of the schema inference.
See `BigQuery API documentation on valid column names
<https://cloud.google.com/bigquery/docs/schemas#column_names`>__.
.. versionadded:: 0.3.1
location : str, optional
Location where the load job should run. See the `BigQuery locations
documentation
<https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
list of available locations. The location must match that of the
target dataset.
.. versionadded:: 0.5.0
progress_bar : bool, default True
Use the library `tqdm` to show the progress bar for the upload,
chunk by chunk.
.. versionadded:: 0.5.0
credentials : google.auth.credentials.Credentials, optional
Credentials for accessing Google APIs. Use this parameter to override
default credentials, such as to use Compute Engine
:class:`google.auth.compute_engine.Credentials` or Service Account
:class:`google.oauth2.service_account.Credentials` directly.
.. versionadded:: 0.8.0
api_method : str, optional
API method used to upload DataFrame to BigQuery. One of "load_parquet",
"load_csv". Default "load_parquet" if pandas is version 1.1.0+,
otherwise "load_csv".
.. versionadded:: 0.16.0
verbose : bool, deprecated
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
to adjust verbosity instead
<https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
private_key : str, deprecated
Deprecated in pandas-gbq version 0.8.0. Use the ``credentials``
parameter and
:func:`google.oauth2.service_account.Credentials.from_service_account_info`
or
:func:`google.oauth2.service_account.Credentials.from_service_account_file`
instead.
"""
_test_google_api_imports()
if verbose is not None and FEATURES.pandas_has_deprecated_verbose:
warnings.warn(
"verbose is deprecated and will be removed in "
"a future version. Set logging level in order to vary "
"verbosity",
FutureWarning,
stacklevel=1,
)
if api_method == "default":
# Avoid using parquet if pandas doesn't support lossless conversions to
# parquet timestamp. See: https://stackoverflow.com/a/69758676/101923
if FEATURES.pandas_has_parquet_with_lossless_timestamp:
api_method = "load_parquet"
else:
api_method = "load_csv"
if chunksize is not None:
if api_method == "load_parquet":
warnings.warn(
"chunksize is ignored when using api_method='load_parquet'",
DeprecationWarning,
stacklevel=2,
)
elif api_method == "load_csv":
warnings.warn(
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
PendingDeprecationWarning,
stacklevel=2,
)
if if_exists not in ("fail", "replace", "append"):
raise ValueError("'{0}' is not valid for if_exists".format(if_exists))
if "." not in destination_table:
raise NotFoundException(
"Invalid Table Name. Should be of the form 'datasetId.tableId' or "
"'projectId.datasetId.tableId'"
)
connector = GbqConnector(
project_id,
reauth=reauth,
auth_local_webserver=auth_local_webserver,
location=location,
credentials=credentials,
private_key=private_key,
)
bqclient = connector.client
destination_table_ref = bigquery.table.TableReference.from_string(
destination_table, default_project=connector.project_id
)
project_id_table = destination_table_ref.project
dataset_id = destination_table_ref.dataset_id
table_id = destination_table_ref.table_id
default_schema = _generate_bq_schema(dataframe)
if not table_schema:
table_schema = default_schema
else:
table_schema = pandas_gbq.schema.update_schema(
default_schema, dict(fields=table_schema)
)
# If table exists, check if_exists parameter
try:
> table = bqclient.get_table(destination_table_ref)
pandas_gbq/gbq.py:1055:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery.client.Client object at 0x7feb34297670>
table = TableReference(DatasetReference('precise-truck-742', 'python_bigquery_pandas_tests_system_20211108164757_29101b'), 'to_gbq_test')
retry = <google.api_core.retry.Retry object at 0x7feb368ec8b0>, timeout = None
def get_table(
self,
table: Union[Table, TableReference, TableListItem, str],
retry: retries.Retry = DEFAULT_RETRY,
timeout: float = DEFAULT_TIMEOUT,
) -> Table:
"""Fetch the table referenced by ``table``.
Args:
table (Union[ \
google.cloud.bigquery.table.Table, \
google.cloud.bigquery.table.TableReference, \
google.cloud.bigquery.table.TableListItem, \
str, \
]):
A reference to the table to fetch from the BigQuery API.
If a string is passed in, this method attempts to create a
table reference from a string using
:func:`google.cloud.bigquery.table.TableReference.from_string`.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
Returns:
google.cloud.bigquery.table.Table:
A ``Table`` instance.
"""
table_ref = _table_arg_to_table_ref(table, default_project=self.project)
path = table_ref.path
span_attributes = {"path": path}
> api_response = self._call_api(
retry,
span_name="BigQuery.getTable",
span_attributes=span_attributes,
method="GET",
path=path,
timeout=timeout,
)
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/bigquery/client.py:1012:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery.client.Client object at 0x7feb34297670>
retry = <google.api_core.retry.Retry object at 0x7feb368ec8b0>
span_name = 'BigQuery.getTable'
span_attributes = {'path': '/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test'}
job_ref = None, headers = None
kwargs = {'method': 'GET', 'path': '/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test', 'timeout': None}
call = <function Retry.__call__.<locals>.retry_wrapped_func at 0x7feb3507c820>
def _call_api(
self,
retry,
span_name=None,
span_attributes=None,
job_ref=None,
headers: Optional[Dict[str, str]] = None,
**kwargs,
):
kwargs = _add_server_timeout_header(headers, kwargs)
call = functools.partial(self._connection.api_request, **kwargs)
if retry:
call = retry(call)
if span_name is not None:
with create_span(
name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
):
> return call()
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/bigquery/client.py:760:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = (), kwargs = {}
target = functools.partial(functools.partial(<bound method JSONConnection.api_request of <google.cloud.bigquery._http.Connectio...recise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test', timeout=None))
sleep_generator = <generator object exponential_sleep_generator at 0x7feb3429c4a0>
@functools.wraps(func)
def retry_wrapped_func(*args, **kwargs):
"""A wrapper that calls target function with retry."""
target = functools.partial(func, *args, **kwargs)
sleep_generator = exponential_sleep_generator(
self._initial, self._maximum, multiplier=self._multiplier
)
> return retry_target(
target,
self._predicate,
sleep_generator,
self._deadline,
on_error=on_error,
)
.nox/system-3-9/lib/python3.9/site-packages/google/api_core/retry.py:283:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
target = functools.partial(functools.partial(<bound method JSONConnection.api_request of <google.cloud.bigquery._http.Connectio...recise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test', timeout=None))
predicate = <function _should_retry at 0x7feb368f3160>
sleep_generator = <generator object exponential_sleep_generator at 0x7feb3429c4a0>
deadline = 600.0, on_error = None
def retry_target(target, predicate, sleep_generator, deadline, on_error=None):
"""Call a function and retry if it fails.
This is the lowest-level retry helper. Generally, you'll use the
higher-level retry helper :class:`Retry`.
Args:
target(Callable): The function to call and retry. This must be a
nullary function - apply arguments with `functools.partial`.
predicate (Callable[Exception]): A callable used to determine if an
exception raised by the target should be considered retryable.
It should return True to retry or False otherwise.
sleep_generator (Iterable[float]): An infinite iterator that determines
how long to sleep between retries.
deadline (float): How long to keep retrying the target. The last sleep
period is shortened as necessary, so that the last retry runs at
``deadline`` (and not considerably beyond it).
on_error (Callable[Exception]): A function to call while processing a
retryable exception. Any error raised by this function will *not*
be caught.
Returns:
Any: the return value of the target function.
Raises:
google.api_core.RetryError: If the deadline is exceeded while retrying.
ValueError: If the sleep generator stops yielding values.
Exception: If the target raises a method that isn't retryable.
"""
if deadline is not None:
deadline_datetime = datetime_helpers.utcnow() + datetime.timedelta(
seconds=deadline
)
else:
deadline_datetime = None
last_exc = None
for sleep in sleep_generator:
try:
> return target()
.nox/system-3-9/lib/python3.9/site-packages/google/api_core/retry.py:190:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery._http.Connection object at 0x7feb342974f0>
method = 'GET'
path = '/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test'
query_params = None, data = None, content_type = None, headers = None
api_base_url = None, api_version = None, expect_json = True
_target_object = None, timeout = None
def api_request(
self,
method,
path,
query_params=None,
data=None,
content_type=None,
headers=None,
api_base_url=None,
api_version=None,
expect_json=True,
_target_object=None,
timeout=_DEFAULT_TIMEOUT,
):
"""Make a request over the HTTP transport to the API.
You shouldn't need to use this method, but if you plan to
interact with the API using these primitives, this is the
correct one to use.
:type method: str
:param method: The HTTP method name (ie, ``GET``, ``POST``, etc).
Required.
:type path: str
:param path: The path to the resource (ie, ``'/b/bucket-name'``).
Required.
:type query_params: dict or list
:param query_params: A dictionary of keys and values (or list of
key-value pairs) to insert into the query
string of the URL.
:type data: str
:param data: The data to send as the body of the request. Default is
the empty string.
:type content_type: str
:param content_type: The proper MIME type of the data provided. Default
is None.
:type headers: dict
:param headers: extra HTTP headers to be sent with the request.
:type api_base_url: str
:param api_base_url: The base URL for the API endpoint.
Typically you won't have to provide this.
Default is the standard API base URL.
:type api_version: str
:param api_version: The version of the API to call. Typically
you shouldn't provide this and instead use
the default for the library. Default is the
latest API version supported by
google-cloud-python.
:type expect_json: bool
:param expect_json: If True, this method will try to parse the
response as JSON and raise an exception if
that cannot be done. Default is True.
:type _target_object: :class:`object`
:param _target_object:
(Optional) Protected argument to be used by library callers. This
can allow custom behavior, for example, to defer an HTTP request
and complete initialization of the object at a later time.
:type timeout: float or tuple
:param timeout: (optional) The amount of time, in seconds, to wait
for the server response.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
:raises ~google.cloud.exceptions.GoogleCloudError: if the response code
is not 200 OK.
:raises ValueError: if the response content type is not JSON.
:rtype: dict or str
:returns: The API response payload, either as a raw string or
a dictionary if the response is valid JSON.
"""
url = self.build_api_url(
path=path,
query_params=query_params,
api_base_url=api_base_url,
api_version=api_version,
)
# Making the executive decision that any dictionary
# data will be sent properly as JSON.
if data and isinstance(data, dict):
data = json.dumps(data)
content_type = "application/json"
response = self._make_request(
method=method,
url=url,
data=data,
content_type=content_type,
headers=headers,
target_object=_target_object,
timeout=timeout,
)
if not 200 <= response.status_code < 300:
> raise exceptions.from_http_response(response)
E google.api_core.exceptions.NotFound: 404 GET https://bigquery.googleapis.com/bigquery/v2/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables/to_gbq_test?prettyPrint=false: Not found: Dataset precise-truck-742:python_bigquery_pandas_tests_system_20211108164757_29101b
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/_http.py:479: NotFound
During handling of the above exception, another exception occurred:
self = <pandas_gbq.gbq._Table object at 0x7feb35053040>
table_id = 'to_gbq_test'
schema = {'fields': [{'name': 'bools', 'type': 'BOOLEAN'}, {'name': 'flts', 'type': 'FLOAT'}, {'name': 'ints', 'type': 'INTEGER'}, {'name': 'strs', 'type': 'STRING'}, {'name': 'times', 'type': 'TIMESTAMP'}]}
def create(self, table_id, schema):
"""Create a table in Google BigQuery given a table and schema
Parameters
----------
table : str
Name of table to be written
schema : str
Use the generate_bq_schema to generate your table schema from a
dataframe.
"""
from google.cloud.bigquery import DatasetReference
from google.cloud.bigquery import Table
from google.cloud.bigquery import TableReference
if self.exists(table_id):
raise TableCreationError("Table {0} already exists".format(table_id))
if not _Dataset(self.project_id, credentials=self.credentials).exists(
self.dataset_id
):
_Dataset(
self.project_id, credentials=self.credentials, location=self.location,
).create(self.dataset_id)
table_ref = TableReference(
DatasetReference(self.project_id, self.dataset_id), table_id
)
table = Table(table_ref)
table.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
try:
> self.client.create_table(table)
pandas_gbq/gbq.py:1223:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery.client.Client object at 0x7feb3504d520>
table = Table(TableReference(DatasetReference('precise-truck-742', 'python_bigquery_pandas_tests_system_20211108164757_29101b'), 'to_gbq_test'))
exists_ok = False
retry = <google.api_core.retry.Retry object at 0x7feb368ec8b0>, timeout = None
def create_table(
self,
table: Union[str, Table, TableReference, TableListItem],
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: float = DEFAULT_TIMEOUT,
) -> Table:
"""API call: create a table via a PUT request
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
Args:
table (Union[ \
google.cloud.bigquery.table.Table, \
google.cloud.bigquery.table.TableReference, \
google.cloud.bigquery.table.TableListItem, \
str, \
]):
A :class:`~google.cloud.bigquery.table.Table` to create.
If ``table`` is a reference, an empty table is created
with the specified ID. The dataset that the table belongs to
must already exist.
exists_ok (Optional[bool]):
Defaults to ``False``. If ``True``, ignore "already exists"
errors when creating the table.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
Returns:
google.cloud.bigquery.table.Table:
A new ``Table`` returned from the service.
Raises:
google.cloud.exceptions.Conflict:
If the table already exists.
"""
table = _table_arg_to_table(table, default_project=self.project)
dataset_id = table.dataset_id
path = "/projects/%s/datasets/%s/tables" % (table.project, dataset_id)
data = table.to_api_repr()
try:
span_attributes = {"path": path, "dataset_id": dataset_id}
> api_response = self._call_api(
retry,
span_name="BigQuery.createTable",
span_attributes=span_attributes,
method="POST",
path=path,
data=data,
timeout=timeout,
)
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/bigquery/client.py:726:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery.client.Client object at 0x7feb3504d520>
retry = <google.api_core.retry.Retry object at 0x7feb368ec8b0>
span_name = 'BigQuery.createTable'
span_attributes = {'dataset_id': 'python_bigquery_pandas_tests_system_20211108164757_29101b', 'path': '/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables'}
job_ref = None, headers = None
kwargs = {'data': {'labels': {}, 'schema': {'fields': [{'mode': 'NULLABLE', 'name': 'bools', 'type': 'BOOLEAN'}, {'mode': 'NULL...projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables', 'timeout': None}
call = <function Retry.__call__.<locals>.retry_wrapped_func at 0x7feb3507cdc0>
def _call_api(
self,
retry,
span_name=None,
span_attributes=None,
job_ref=None,
headers: Optional[Dict[str, str]] = None,
**kwargs,
):
kwargs = _add_server_timeout_header(headers, kwargs)
call = functools.partial(self._connection.api_request, **kwargs)
if retry:
call = retry(call)
if span_name is not None:
with create_span(
name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
):
> return call()
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/bigquery/client.py:760:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = (), kwargs = {}
target = functools.partial(functools.partial(<bound method JSONConnection.api_request of <google.cloud.bigquery._http.Connectio...', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'times', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}]}}, timeout=None))
sleep_generator = <generator object exponential_sleep_generator at 0x7feb097fc9e0>
@functools.wraps(func)
def retry_wrapped_func(*args, **kwargs):
"""A wrapper that calls target function with retry."""
target = functools.partial(func, *args, **kwargs)
sleep_generator = exponential_sleep_generator(
self._initial, self._maximum, multiplier=self._multiplier
)
> return retry_target(
target,
self._predicate,
sleep_generator,
self._deadline,
on_error=on_error,
)
.nox/system-3-9/lib/python3.9/site-packages/google/api_core/retry.py:283:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
target = functools.partial(functools.partial(<bound method JSONConnection.api_request of <google.cloud.bigquery._http.Connectio...', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'times', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}]}}, timeout=None))
predicate = <function _should_retry at 0x7feb368f3160>
sleep_generator = <generator object exponential_sleep_generator at 0x7feb097fc9e0>
deadline = 600.0, on_error = None
def retry_target(target, predicate, sleep_generator, deadline, on_error=None):
"""Call a function and retry if it fails.
This is the lowest-level retry helper. Generally, you'll use the
higher-level retry helper :class:`Retry`.
Args:
target(Callable): The function to call and retry. This must be a
nullary function - apply arguments with `functools.partial`.
predicate (Callable[Exception]): A callable used to determine if an
exception raised by the target should be considered retryable.
It should return True to retry or False otherwise.
sleep_generator (Iterable[float]): An infinite iterator that determines
how long to sleep between retries.
deadline (float): How long to keep retrying the target. The last sleep
period is shortened as necessary, so that the last retry runs at
``deadline`` (and not considerably beyond it).
on_error (Callable[Exception]): A function to call while processing a
retryable exception. Any error raised by this function will *not*
be caught.
Returns:
Any: the return value of the target function.
Raises:
google.api_core.RetryError: If the deadline is exceeded while retrying.
ValueError: If the sleep generator stops yielding values.
Exception: If the target raises a method that isn't retryable.
"""
if deadline is not None:
deadline_datetime = datetime_helpers.utcnow() + datetime.timedelta(
seconds=deadline
)
else:
deadline_datetime = None
last_exc = None
for sleep in sleep_generator:
try:
> return target()
.nox/system-3-9/lib/python3.9/site-packages/google/api_core/retry.py:190:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery._http.Connection object at 0x7feb3504dd00>
method = 'POST'
path = '/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables'
query_params = None
data = '{"tableReference": {"projectId": "precise-truck-742", "datasetId": "python_bigquery_pandas_tests_system_2021110816475... {"name": "strs", "type": "STRING", "mode": "NULLABLE"}, {"name": "times", "type": "TIMESTAMP", "mode": "NULLABLE"}]}}'
content_type = 'application/json', headers = None, api_base_url = None
api_version = None, expect_json = True, _target_object = None, timeout = None
def api_request(
self,
method,
path,
query_params=None,
data=None,
content_type=None,
headers=None,
api_base_url=None,
api_version=None,
expect_json=True,
_target_object=None,
timeout=_DEFAULT_TIMEOUT,
):
"""Make a request over the HTTP transport to the API.
You shouldn't need to use this method, but if you plan to
interact with the API using these primitives, this is the
correct one to use.
:type method: str
:param method: The HTTP method name (ie, ``GET``, ``POST``, etc).
Required.
:type path: str
:param path: The path to the resource (ie, ``'/b/bucket-name'``).
Required.
:type query_params: dict or list
:param query_params: A dictionary of keys and values (or list of
key-value pairs) to insert into the query
string of the URL.
:type data: str
:param data: The data to send as the body of the request. Default is
the empty string.
:type content_type: str
:param content_type: The proper MIME type of the data provided. Default
is None.
:type headers: dict
:param headers: extra HTTP headers to be sent with the request.
:type api_base_url: str
:param api_base_url: The base URL for the API endpoint.
Typically you won't have to provide this.
Default is the standard API base URL.
:type api_version: str
:param api_version: The version of the API to call. Typically
you shouldn't provide this and instead use
the default for the library. Default is the
latest API version supported by
google-cloud-python.
:type expect_json: bool
:param expect_json: If True, this method will try to parse the
response as JSON and raise an exception if
that cannot be done. Default is True.
:type _target_object: :class:`object`
:param _target_object:
(Optional) Protected argument to be used by library callers. This
can allow custom behavior, for example, to defer an HTTP request
and complete initialization of the object at a later time.
:type timeout: float or tuple
:param timeout: (optional) The amount of time, in seconds, to wait
for the server response.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
:raises ~google.cloud.exceptions.GoogleCloudError: if the response code
is not 200 OK.
:raises ValueError: if the response content type is not JSON.
:rtype: dict or str
:returns: The API response payload, either as a raw string or
a dictionary if the response is valid JSON.
"""
url = self.build_api_url(
path=path,
query_params=query_params,
api_base_url=api_base_url,
api_version=api_version,
)
# Making the executive decision that any dictionary
# data will be sent properly as JSON.
if data and isinstance(data, dict):
data = json.dumps(data)
content_type = "application/json"
response = self._make_request(
method=method,
url=url,
data=data,
content_type=content_type,
headers=headers,
target_object=_target_object,
timeout=timeout,
)
if not 200 <= response.status_code < 300:
> raise exceptions.from_http_response(response)
E google.api_core.exceptions.NotFound: 404 POST https://bigquery.googleapis.com/bigquery/v2/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables?prettyPrint=false: Not found: Dataset precise-truck-742:python_bigquery_pandas_tests_system_20211108164757_29101b
.nox/system-3-9/lib/python3.9/site-packages/google/cloud/_http.py:479: NotFound
During handling of the above exception, another exception occurred:
self = <system.test_gbq.TestToGBQIntegration object at 0x7feb3504dc40>
project_id = 'precise-truck-742'
random_dataset_id = 'python_bigquery_pandas_tests_system_20211108164757_29101b'
bigquery_client = <google.cloud.bigquery.client.Client object at 0x7feb365b9790>
def test_upload_data_tokyo_non_existing_dataset(
self, project_id, random_dataset_id, bigquery_client
):
from google.cloud import bigquery
test_size = 10
df = make_mixed_dataframe_v2(test_size)
non_existing_tokyo_dataset = random_dataset_id
non_existing_tokyo_destination = "{}.to_gbq_test".format(
non_existing_tokyo_dataset
)
# Initialize table with sample data
> gbq.to_gbq(
df,
non_existing_tokyo_destination,
project_id,
credentials=self.credentials,
location="asia-northeast1",
)
tests/system/test_gbq.py:1431:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas_gbq/gbq.py:1063: in to_gbq
table_connector.create(table_id, table_schema)
pandas_gbq/gbq.py:1225: in create
self.process_http_error(ex)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ex = NotFound('POST https://bigquery.googleapis.com/bigquery/v2/projects/precise-truck-742/datasets/python_bigquery_pandas_...les?prettyPrint=false: Not found: Dataset precise-truck-742:python_bigquery_pandas_tests_system_20211108164757_29101b')
@staticmethod
def process_http_error(ex):
# See `BigQuery Troubleshooting Errors
# <https://cloud.google.com/bigquery/troubleshooting-errors>`__
> raise GenericGBQException("Reason: {0}".format(ex))
E pandas_gbq.exceptions.GenericGBQException: Reason: 404 POST https://bigquery.googleapis.com/bigquery/v2/projects/precise-truck-742/datasets/python_bigquery_pandas_tests_system_20211108164757_29101b/tables?prettyPrint=false: Not found: Dataset precise-truck-742:python_bigquery_pandas_tests_system_20211108164757_29101b
pandas_gbq/gbq.py:375: GenericGBQException