diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index dc7edd8db662e..31e641e4a08bf 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -180,7 +180,9 @@ I/O - Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) - `read_csv` will now raise a ``ValueError`` when the arguments `header` and `prefix` both are not `None`. (:issue:`27394`) - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) -- +- Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for + ``coerce_timestamps``; following pyarrow's default allows writing nanosecond + timestamps with ``version="2.0"`` (:issue:`31652`). Plotting ^^^^^^^^ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 98f2eb3929b59..926635062d853 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -85,7 +85,6 @@ def write( df: DataFrame, path, compression="snappy", - coerce_timestamps="ms", index: Optional[bool] = None, partition_cols=None, **kwargs, @@ -103,17 +102,12 @@ def write( table, path, compression=compression, - coerce_timestamps=coerce_timestamps, partition_cols=partition_cols, **kwargs, ) else: self.api.parquet.write_table( - table, - path, - compression=compression, - coerce_timestamps=coerce_timestamps, - **kwargs, + table, path, compression=compression, **kwargs, ) def read(self, path, columns=None, **kwargs): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d51c712ed5abd..7ed8d8f22764c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -564,6 +564,13 @@ def test_additional_extension_types(self, pa): ) check_round_trip(df, pa) + @td.skip_if_no("pyarrow", min_version="0.14") + def test_timestamp_nanoseconds(self, pa): + # with version 2.0, pyarrow defaults to writing the nanoseconds, so + # this should work without error + df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)}) + check_round_trip(df, pa, write_kwargs={"version": "2.0"}) + class TestParquetFastParquet(Base): @td.skip_if_no("fastparquet", min_version="0.3.2")