From 9e89af8c7925f6bec473dc902f27c7bf975848af Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 30 Oct 2024 15:22:02 +0100 Subject: [PATCH 1/2] CI/TST: fix parquet tz test returning pytz fixed offset (pyarrow 18) --- pandas/tests/io/test_parquet.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 4c2ea036f08dc..420be35ba5c6c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -17,7 +17,6 @@ pa_version_under13p0, pa_version_under15p0, pa_version_under17p0, - pa_version_under18p0, ) import pandas as pd @@ -977,18 +976,6 @@ def test_timestamp_nanoseconds(self, pa): def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): pytest.importorskip("pyarrow", "11.0.0") - if ( - timezone_aware_date_list.tzinfo != datetime.timezone.utc - and pa_version_under18p0 - ): - request.applymarker( - pytest.mark.xfail( - reason=( - "pyarrow returns pytz.FixedOffset while pandas " - "constructs datetime.timezone https://github.com/pandas-dev/pandas/issues/37286" - ) - ) - ) idx = 5 * [timezone_aware_date_list] df = pd.DataFrame(index=idx, data={"index_as_col": idx}) @@ -1005,6 +992,15 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): expected = df[:] if pa_version_under11p0: expected.index = expected.index.as_unit("ns") + if timezone_aware_date_list.tzinfo != datetime.timezone.utc: + # pyarrow returns pytz.FixedOffset while pandas constructs datetime.timezone + # https://github.com/pandas-dev/pandas/issues/37286 + import pytz + + offset = df.index.tz.utcoffset(timezone_aware_date_list) + tz = pytz.FixedOffset(offset.total_seconds() / 60) + expected.index = expected.index.tz_convert(tz) + expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz) check_round_trip(df, pa, check_dtype=False, expected=expected) def test_filter_row_groups(self, pa): From 9556e90b4a444ace5c5bf5e3ca7cd01711e77742 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 30 Oct 2024 20:49:37 +0100 Subject: [PATCH 2/2] only convert to pytz if installed --- pandas/tests/io/test_parquet.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 420be35ba5c6c..6ef7105cf5ccc 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -973,7 +973,7 @@ def test_timestamp_nanoseconds(self, pa): df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1ns", periods=10)}) check_round_trip(df, pa, write_kwargs={"version": ver}) - def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): + def test_timezone_aware_index(self, pa, timezone_aware_date_list): pytest.importorskip("pyarrow", "11.0.0") idx = 5 * [timezone_aware_date_list] @@ -995,12 +995,15 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if timezone_aware_date_list.tzinfo != datetime.timezone.utc: # pyarrow returns pytz.FixedOffset while pandas constructs datetime.timezone # https://github.com/pandas-dev/pandas/issues/37286 - import pytz - - offset = df.index.tz.utcoffset(timezone_aware_date_list) - tz = pytz.FixedOffset(offset.total_seconds() / 60) - expected.index = expected.index.tz_convert(tz) - expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz) + try: + import pytz + except ImportError: + pass + else: + offset = df.index.tz.utcoffset(timezone_aware_date_list) + tz = pytz.FixedOffset(offset.total_seconds() / 60) + expected.index = expected.index.tz_convert(tz) + expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz) check_round_trip(df, pa, check_dtype=False, expected=expected) def test_filter_row_groups(self, pa):