Skip to content

Commit 6bfd1ee

Browse files
bug: add missing latency check (#763)
* specify a particular version of bigquery to debug * again tweaking the versions to debug issue. * add some pip freeze commands for debugging * updates minimum latency to correct a flaky bot issue and protect users * Update noxfile.py * Update noxfile.py * Update setup.py * Update noxfile.py * add several test cases to test validation logic * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent aaa7090 commit 6bfd1ee

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

noxfile.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ def system(session):
256256

257257
install_systemtest_dependencies(session, "-c", constraints_path)
258258

259+
# Print out package versions.
259260
session.run("python", "-m", "pip", "freeze")
260261

261262
# Run py.test against the system tests.
@@ -352,12 +353,15 @@ def prerelease(session):
352353
"--quiet",
353354
f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml",
354355
os.path.join("tests", "unit"),
356+
*session.posargs,
355357
)
358+
356359
session.run(
357360
"py.test",
358361
"--quiet",
359362
f"--junitxml=prerelease_system_{session.python}_sponge_log.xml",
360363
os.path.join("tests", "system"),
364+
*session.posargs,
361365
)
362366

363367

@@ -515,7 +519,9 @@ def prerelease_deps(session):
515519
session.install(*other_deps)
516520
session.run("python", "-m", "pip", "freeze")
517521

518-
# Print out prerelease package versions
522+
# Print out package versions.
523+
session.run("python", "-m", "pip", "freeze")
524+
519525
session.run(
520526
"python", "-c", "import google.protobuf; print(google.protobuf.__version__)"
521527
)

pandas_gbq/gbq.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
411411
timeout_ms = job_config_dict.get("jobTimeoutMs") or job_config_dict[
412412
"query"
413413
].get("timeoutMs")
414-
timeout_ms = int(timeout_ms) if timeout_ms else None
414+
415+
if timeout_ms:
416+
timeout_ms = int(timeout_ms)
417+
# Having too small a timeout_ms results in individual
418+
# API calls timing out before they can finish.
419+
# ~300 milliseconds is rule of thumb for bare minimum
420+
# latency from the BigQuery API.
421+
minimum_latency = 400
422+
if timeout_ms < minimum_latency:
423+
raise QueryTimeout(
424+
f"Query timeout must be at least 400 milliseconds: timeout_ms equals {timeout_ms}."
425+
)
426+
else:
427+
timeout_ms = None
415428

416429
self._start_timer()
417430
job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)

tests/system/test_gbq.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,13 +473,41 @@ def test_timeout_configuration(self, project_id):
473473
sql_statement = """
474474
select count(*) from unnest(generate_array(1,1000000)), unnest(generate_array(1, 10000))
475475
"""
476+
477+
# This first test confirms that we get a timeout error if we exceed the timeout limit.
478+
# The above query is expected to take a long time and exceed the limit.
476479
configs = [
480+
# we have a minimum limit on the timeout_ms being 400 milliseconds
481+
# see pandas-gbq/gbq.py/GbqConnector/run_query docstring
482+
# for more details.
477483
# pandas-gbq timeout configuration. Transformed to REST API compatible version.
478-
{"query": {"useQueryCache": False, "timeoutMs": 1}},
484+
{"query": {"useQueryCache": False, "timeoutMs": 401}},
479485
# REST API job timeout. See:
480486
# https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms
487+
{"query": {"useQueryCache": False}, "jobTimeoutMs": 401},
488+
]
489+
for config in configs:
490+
with pytest.raises(gbq.QueryTimeout):
491+
gbq.read_gbq(
492+
sql_statement,
493+
project_id=project_id,
494+
credentials=self.credentials,
495+
configuration=config,
496+
)
497+
498+
# This second test confirms out our validation logic won't allow a
499+
# value less than or equal to 400 be used as a timeout value.
500+
# by exercising the system for various edge cases to ensure we catch
501+
# invalid values less than or equal to 400.
502+
configs = [
503+
{"query": {"useQueryCache": False, "timeoutMs": 399}},
504+
{"query": {"useQueryCache": False, "timeoutMs": 400}},
505+
{"query": {"useQueryCache": False, "timeoutMs": 1}},
506+
{"query": {"useQueryCache": False}, "jobTimeoutMs": 399},
507+
{"query": {"useQueryCache": False}, "jobTimeoutMs": 400},
481508
{"query": {"useQueryCache": False}, "jobTimeoutMs": 1},
482509
]
510+
483511
for config in configs:
484512
with pytest.raises(gbq.QueryTimeout):
485513
gbq.read_gbq(

0 commit comments

Comments
 (0)