Skip to content

Commit 38308e7

Browse files
Add test_non_nanosecond_timestamps
1 parent 642d244 commit 38308e7

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

pandas/tests/io/test_parquet.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,31 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
11311131
# assert result["strings"].dtype == "string"
11321132
# FIXME: don't leave commented-out
11331133

1134+
def test_non_nanosecond_timestamps(self, tmp_path, pa):
1135+
# GH#49236
1136+
#
1137+
# pandas 1.x didn't support non-nanosecond datetimes.
1138+
# pyarrow.Table.to_pandas supports timestamp_as_object param to solve that issue:
1139+
# https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
1140+
#
1141+
# This test tests that the current version of pandas supports non-nanosecond (microsecond in this case) datetimes,
1142+
# the code example from GH#49236 doesn't fail anymore, and timestamp_as_object is not needed.
1143+
import pyarrow as pa
1144+
import pyarrow.parquet as pq
1145+
1146+
path = tmp_path / "non_nanosecond_timestamp.p"
1147+
1148+
arr = pa.array([datetime.datetime(1600, 1, 1)], type=pa.timestamp("us"))
1149+
table = pa.table([arr], names=["timestamp"])
1150+
pq.write_table(table, path)
1151+
1152+
result = read_parquet(path)
1153+
expected = pd.DataFrame(
1154+
data={"timestamp": [datetime.datetime(1600, 1, 1)]},
1155+
dtype="datetime64[us]",
1156+
)
1157+
tm.assert_frame_equal(result, expected)
1158+
11341159

11351160
class TestParquetFastParquet(Base):
11361161
@pytest.mark.xfail(reason="datetime_with_nat gets incorrect values")

0 commit comments

Comments
 (0)