diff --git a/noxfile.py b/noxfile.py index 49ec38f6..2b973857 100644 --- a/noxfile.py +++ b/noxfile.py @@ -256,6 +256,7 @@ def system(session): install_systemtest_dependencies(session, "-c", constraints_path) + # Print out package versions. session.run("python", "-m", "pip", "freeze") # Run py.test against the system tests. @@ -352,12 +353,15 @@ def prerelease(session): "--quiet", f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml", os.path.join("tests", "unit"), + *session.posargs, ) + session.run( "py.test", "--quiet", f"--junitxml=prerelease_system_{session.python}_sponge_log.xml", os.path.join("tests", "system"), + *session.posargs, ) @@ -515,7 +519,9 @@ def prerelease_deps(session): session.install(*other_deps) session.run("python", "-m", "pip", "freeze") - # Print out prerelease package versions + # Print out package versions. + session.run("python", "-m", "pip", "freeze") + session.run( "python", "-c", "import google.protobuf; print(google.protobuf.__version__)" ) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index f93999a9..60bb8bda 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -411,7 +411,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): timeout_ms = job_config_dict.get("jobTimeoutMs") or job_config_dict[ "query" ].get("timeoutMs") - timeout_ms = int(timeout_ms) if timeout_ms else None + + if timeout_ms: + timeout_ms = int(timeout_ms) + # Having too small a timeout_ms results in individual + # API calls timing out before they can finish. + # ~300 milliseconds is rule of thumb for bare minimum + # latency from the BigQuery API. + minimum_latency = 400 + if timeout_ms < minimum_latency: + raise QueryTimeout( + f"Query timeout must be at least 400 milliseconds: timeout_ms equals {timeout_ms}." + ) + else: + timeout_ms = None self._start_timer() job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 7afa4ae9..9bbefceb 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -473,13 +473,41 @@ def test_timeout_configuration(self, project_id): sql_statement = """ select count(*) from unnest(generate_array(1,1000000)), unnest(generate_array(1, 10000)) """ + + # This first test confirms that we get a timeout error if we exceed the timeout limit. + # The above query is expected to take a long time and exceed the limit. configs = [ + # we have a minimum limit on the timeout_ms being 400 milliseconds + # see pandas-gbq/gbq.py/GbqConnector/run_query docstring + # for more details. # pandas-gbq timeout configuration. Transformed to REST API compatible version. - {"query": {"useQueryCache": False, "timeoutMs": 1}}, + {"query": {"useQueryCache": False, "timeoutMs": 401}}, # REST API job timeout. See: # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms + {"query": {"useQueryCache": False}, "jobTimeoutMs": 401}, + ] + for config in configs: + with pytest.raises(gbq.QueryTimeout): + gbq.read_gbq( + sql_statement, + project_id=project_id, + credentials=self.credentials, + configuration=config, + ) + + # This second test confirms out our validation logic won't allow a + # value less than or equal to 400 be used as a timeout value. + # by exercising the system for various edge cases to ensure we catch + # invalid values less than or equal to 400. + configs = [ + {"query": {"useQueryCache": False, "timeoutMs": 399}}, + {"query": {"useQueryCache": False, "timeoutMs": 400}}, + {"query": {"useQueryCache": False, "timeoutMs": 1}}, + {"query": {"useQueryCache": False}, "jobTimeoutMs": 399}, + {"query": {"useQueryCache": False}, "jobTimeoutMs": 400}, {"query": {"useQueryCache": False}, "jobTimeoutMs": 1}, ] + for config in configs: with pytest.raises(gbq.QueryTimeout): gbq.read_gbq(