Skip to content

Commit 9a65383

Browse files
feat: to_gbq uses Parquet by default, use api_method="load_csv" for old behavior (#413)
* avoid parquet for older pandas docs: deprecate `chunksize` when used with load jobs * keep `chunksize` for future use in streaming APIs deps: explicitly require `pyarrow >= 3.0` * mention pyarrow as a dependency * add pyarrow to conda deps deps: explicitly require `numpy >= 1.16.6` * update minimum numpy Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 81fa744 commit 9a65383

16 files changed

+286
-151
lines changed

CONTRIBUTING.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ Running System Tests
148148

149149
.. note::
150150

151-
System tests are only configured to run under Python 3.8 and 3.9.
151+
System tests are only configured to run under Python 3.7, 3.8 and 3.9.
152152
For expediency, we do not run them in older versions of Python 3.
153153

154154
This alone will not run the tests. You'll need to change some local

ci/requirements-3.7-0.23.2.conda

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ codecov
22
coverage
33
fastavro
44
flake8
5-
numpy==1.14.5
5+
numpy==1.16.6
66
google-cloud-bigquery==1.11.1
7+
pyarrow==3.0.0
78
pydata-google-auth
89
pytest
910
pytest-cov

ci/requirements-3.9-NIGHTLY.conda

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pydata-google-auth
22
google-cloud-bigquery
33
google-cloud-bigquery-storage
4+
pyarrow
45
pytest
56
pytest-cov
67
codecov

docs/install.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Install from Source
2929

3030
.. code-block:: shell
3131
32-
$ pip install git+https://github.com/pydata/pandas-gbq.git
32+
$ pip install git+https://github.com/googleapis/python-bigquery-pandas.git
3333
3434
3535
Dependencies
@@ -38,6 +38,7 @@ Dependencies
3838
This module requires following additional dependencies:
3939

4040
- `pydata-google-auth <https://github.com/pydata/pydata-google-auth>`__: Helpers for authentication to Google's API
41+
- `pyarrow <https://arrow.apache.org/docs/python/>`__: Format for getting data to/from Google BigQuery
4142
- `google-auth <https://github.com/GoogleCloudPlatform/google-auth-library-python>`__: authentication and authorization for Google's API
4243
- `google-auth-oauthlib <https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib>`__: integration with `oauthlib <https://github.com/idan/oauthlib>`__ for end-user authentication
4344
- `google-cloud-bigquery <https://googleapis.dev/python/bigquery/latest/index.html>`__: Google Cloud client library for BigQuery

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]
2929

3030
DEFAULT_PYTHON_VERSION = "3.8"
31-
SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.9"]
31+
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
3232
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
3333

3434
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()

owlbot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
extras = ["tqdm"]
3232
templated_files = common.py_library(
3333
unit_test_python_versions=["3.7", "3.8", "3.9"],
34-
system_test_python_versions=["3.8", "3.9"],
34+
system_test_python_versions=["3.7", "3.8", "3.9"],
3535
cov_level=86,
3636
unit_test_extras=extras,
3737
system_test_extras=extras,

pandas_gbq/exceptions.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,30 @@
33
# license that can be found in the LICENSE file.
44

55

6+
class GenericGBQException(ValueError):
7+
"""
8+
Raised when an unrecognized Google API Error occurs.
9+
"""
10+
11+
612
class AccessDenied(ValueError):
713
"""
814
Raised when invalid credentials are provided, or tokens have expired.
915
"""
1016

11-
pass
17+
18+
class ConversionError(GenericGBQException):
19+
"""
20+
Raised when there is a problem converting the DataFrame to a format
21+
required to upload it to BigQuery.
22+
"""
1223

1324

1425
class InvalidPrivateKeyFormat(ValueError):
1526
"""
1627
Raised when provided private key has invalid format.
1728
"""
1829

19-
pass
20-
2130

2231
class PerformanceWarning(RuntimeWarning):
2332
"""

pandas_gbq/features.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
1111
BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0"
1212
PANDAS_VERBOSITY_DEPRECATION_VERSION = "0.23.0"
13+
PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION = "1.1.0"
1314

1415

1516
class Features:
@@ -89,5 +90,14 @@ def pandas_has_deprecated_verbose(self):
8990
)
9091
return self.pandas_installed_version >= pandas_verbosity_deprecation
9192

93+
@property
94+
def pandas_has_parquet_with_lossless_timestamp(self):
95+
import pkg_resources
96+
97+
desired_version = pkg_resources.parse_version(
98+
PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION
99+
)
100+
return self.pandas_installed_version >= desired_version
101+
92102

93103
FEATURES = Features()

pandas_gbq/gbq.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
bigquery = None
1919
google_exceptions = None
2020

21-
from pandas_gbq.exceptions import AccessDenied
22-
from pandas_gbq.exceptions import PerformanceWarning
21+
from pandas_gbq.exceptions import (
22+
AccessDenied,
23+
GenericGBQException,
24+
PerformanceWarning,
25+
)
2326
from pandas_gbq import features
2427
from pandas_gbq.features import FEATURES
2528
import pandas_gbq.schema
@@ -69,14 +72,6 @@ class DatasetCreationError(ValueError):
6972
pass
7073

7174

72-
class GenericGBQException(ValueError):
73-
"""
74-
Raised when an unrecognized Google API Error occurs.
75-
"""
76-
77-
pass
78-
79-
8075
class InvalidColumnOrder(ValueError):
8176
"""
8277
Raised when the provided column order for output
@@ -520,7 +515,7 @@ def _download_results(
520515
df = rows_iter.to_dataframe(
521516
dtypes=conversion_dtypes,
522517
progress_bar_type=progress_bar_type,
523-
**to_dataframe_kwargs
518+
**to_dataframe_kwargs,
524519
)
525520
except self.http_error as ex:
526521
self.process_http_error(ex)
@@ -541,6 +536,7 @@ def load_data(
541536
chunksize=None,
542537
schema=None,
543538
progress_bar=True,
539+
api_method: str = "load_parquet",
544540
):
545541
from pandas_gbq import load
546542

@@ -554,6 +550,7 @@ def load_data(
554550
chunksize=chunksize,
555551
schema=schema,
556552
location=self.location,
553+
api_method=api_method,
557554
)
558555
if progress_bar and tqdm:
559556
chunks = tqdm.tqdm(chunks)
@@ -876,6 +873,7 @@ def to_gbq(
876873
location=None,
877874
progress_bar=True,
878875
credentials=None,
876+
api_method: str = "default",
879877
verbose=None,
880878
private_key=None,
881879
):
@@ -964,6 +962,12 @@ def to_gbq(
964962
:class:`google.oauth2.service_account.Credentials` directly.
965963
966964
.. versionadded:: 0.8.0
965+
api_method : str, optional
966+
API method used to upload DataFrame to BigQuery. One of "load_parquet",
967+
"load_csv". Default "load_parquet" if pandas is version 1.1.0+,
968+
otherwise "load_csv".
969+
970+
.. versionadded:: 0.16.0
967971
verbose : bool, deprecated
968972
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
969973
to adjust verbosity instead
@@ -988,6 +992,28 @@ def to_gbq(
988992
stacklevel=1,
989993
)
990994

995+
if api_method == "default":
996+
# Avoid using parquet if pandas doesn't support lossless conversions to
997+
# parquet timestamp. See: https://stackoverflow.com/a/69758676/101923
998+
if FEATURES.pandas_has_parquet_with_lossless_timestamp:
999+
api_method = "load_parquet"
1000+
else:
1001+
api_method = "load_csv"
1002+
1003+
if chunksize is not None:
1004+
if api_method == "load_parquet":
1005+
warnings.warn(
1006+
"chunksize is ignored when using api_method='load_parquet'",
1007+
DeprecationWarning,
1008+
stacklevel=2,
1009+
)
1010+
elif api_method == "load_csv":
1011+
warnings.warn(
1012+
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
1013+
PendingDeprecationWarning,
1014+
stacklevel=2,
1015+
)
1016+
9911017
if if_exists not in ("fail", "replace", "append"):
9921018
raise ValueError("'{0}' is not valid for if_exists".format(if_exists))
9931019

@@ -1071,6 +1097,7 @@ def to_gbq(
10711097
chunksize=chunksize,
10721098
schema=table_schema,
10731099
progress_bar=progress_bar,
1100+
api_method=api_method,
10741101
)
10751102

10761103

0 commit comments

Comments
 (0)