Skip to content

Commit be9ee6d

Browse files
BUG: avoid specifying default coerce_timestamps in to_parquet (#31652)
1 parent 8e47971 commit be9ee6d

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

doc/source/whatsnew/v1.1.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,9 @@ I/O
180180
- Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`)
181181
- `read_csv` will now raise a ``ValueError`` when the arguments `header` and `prefix` both are not `None`. (:issue:`27394`)
182182
- Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`)
183-
-
183+
- Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for
184+
``coerce_timestamps``; following pyarrow's default allows writing nanosecond
185+
timestamps with ``version="2.0"`` (:issue:`31652`).
184186

185187
Plotting
186188
^^^^^^^^

pandas/io/parquet.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ def write(
8585
df: DataFrame,
8686
path,
8787
compression="snappy",
88-
coerce_timestamps="ms",
8988
index: Optional[bool] = None,
9089
partition_cols=None,
9190
**kwargs,
@@ -103,17 +102,12 @@ def write(
103102
table,
104103
path,
105104
compression=compression,
106-
coerce_timestamps=coerce_timestamps,
107105
partition_cols=partition_cols,
108106
**kwargs,
109107
)
110108
else:
111109
self.api.parquet.write_table(
112-
table,
113-
path,
114-
compression=compression,
115-
coerce_timestamps=coerce_timestamps,
116-
**kwargs,
110+
table, path, compression=compression, **kwargs,
117111
)
118112

119113
def read(self, path, columns=None, **kwargs):

pandas/tests/io/test_parquet.py

+7
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,13 @@ def test_additional_extension_types(self, pa):
564564
)
565565
check_round_trip(df, pa)
566566

567+
@td.skip_if_no("pyarrow", min_version="0.14")
568+
def test_timestamp_nanoseconds(self, pa):
569+
# with version 2.0, pyarrow defaults to writing the nanoseconds, so
570+
# this should work without error
571+
df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
572+
check_round_trip(df, pa, write_kwargs={"version": "2.0"})
573+
567574

568575
class TestParquetFastParquet(Base):
569576
@td.skip_if_no("fastparquet", min_version="0.3.2")

0 commit comments

Comments
 (0)