Add test_non_nanosecond_timestamps

EduardAkhmetshin · EduardAkhmetshin · commit 38308e7de368 · 2024-08-03T13:43:27.000+01:00
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -1131,6 +1131,31 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
     #     assert result["strings"].dtype == "string"
     # FIXME: don't leave commented-out
 
+    def test_non_nanosecond_timestamps(self, tmp_path, pa):
+        # GH#49236
+        #
+        # pandas 1.x didn't support non-nanosecond datetimes.
+        # pyarrow.Table.to_pandas supports timestamp_as_object param to solve that issue:
+        # https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
+        #
+        # This test tests that the current version of pandas supports non-nanosecond (microsecond in this case) datetimes,
+        # the code example from GH#49236 doesn't fail anymore, and timestamp_as_object is not needed.
+        import pyarrow as pa
+        import pyarrow.parquet as pq
+
+        path = tmp_path / "non_nanosecond_timestamp.p"
+
+        arr = pa.array([datetime.datetime(1600, 1, 1)], type=pa.timestamp("us"))
+        table = pa.table([arr], names=["timestamp"])
+        pq.write_table(table, path)
+
+        result = read_parquet(path)
+        expected = pd.DataFrame(
+            data={"timestamp": [datetime.datetime(1600, 1, 1)]},
+            dtype="datetime64[us]",
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestParquetFastParquet(Base):
     @pytest.mark.xfail(reason="datetime_with_nat gets incorrect values")