Skip to content

Commit ee67c84

Browse files
jorisvandenbosscheArgusLi
authored andcommitted
apacheGH-15070: [Python][CI] Compatibility with pandas 2.0 (apache#34878)
### What changes are included in this PR? - The issue with numpy 1.25 in the assert equal helper was fixed in pandas 1.5.3 -> removing the skip (in theory can still run into this error when using an older pandas version with the latest numpy, but that's not something you should do) - Casting tz-aware strings to datetime64[ns] was not fixed in pandas (pandas-dev/pandas#50140) -> updating our implementation to work around it - Casting to numpy string dtype (pandas-dev/pandas#50127) is not yet fixed -> updating the skip ### Are there any user-facing changes? No * Closes: apache#15070 Authored-by: Joris Van den Bossche <[email protected]> Signed-off-by: Joris Van den Bossche <[email protected]>
1 parent bfb2a05 commit ee67c84

File tree

2 files changed

+8
-29
lines changed

2 files changed

+8
-29
lines changed

python/pyarrow/pandas_compat.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1148,8 +1148,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
11481148
if pandas_dtype == "datetimetz":
11491149
tz = pa.lib.string_to_tzinfo(
11501150
column_indexes[0]['metadata']['timezone'])
1151-
dt = level.astype(numpy_dtype)
1152-
level = dt.tz_localize('utc').tz_convert(tz)
1151+
level = pd.to_datetime(level, utc=True).tz_convert(tz)
11531152
elif level.dtype != dtype:
11541153
level = level.astype(dtype)
11551154
# ARROW-9096: if original DataFrame was upcast we keep that

python/pyarrow/tests/test_pandas.py

+7-27
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,12 @@ def test_column_index_names_are_preserved(self):
187187
_check_pandas_roundtrip(df, preserve_index=True)
188188

189189
def test_column_index_names_with_tz(self):
190-
if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
191-
# TODO: regression in pandas, should be fixed before final 2.0.0
192-
# https://github.com/pandas-dev/pandas/issues/50140
193-
pytest.skip("Regression in pandas 2.0.0.dev")
194190
# ARROW-13756
195191
# Bug if index is timezone aware DataTimeIndex
196192

197193
df = pd.DataFrame(
198194
np.random.randn(5, 3),
199-
columns=pd.date_range(
200-
"2021-01-01", "2021-01-3", freq="D", tz="CET")
195+
columns=pd.date_range("2021-01-01", periods=3, freq="50D", tz="CET")
201196
)
202197
_check_pandas_roundtrip(df, preserve_index=True)
203198

@@ -453,11 +448,11 @@ def test_mixed_column_names(self):
453448
preserve_index=True)
454449

455450
def test_binary_column_name(self):
456-
if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
457-
# TODO: regression in pandas, should be fixed before final 2.0.0
451+
if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"):
452+
# TODO: regression in pandas, hopefully fixed in next version
458453
# https://issues.apache.org/jira/browse/ARROW-18394
459454
# https://github.com/pandas-dev/pandas/issues/50127
460-
pytest.skip("Regression in pandas 2.0.0.dev")
455+
pytest.skip("Regression in pandas 2.0.0")
461456
column_data = ['い']
462457
key = 'あ'.encode()
463458
data = {key: column_data}
@@ -2064,11 +2059,6 @@ def test_nested_smaller_ints(self):
20642059
assert result3.equals(expected3)
20652060

20662061
def test_infer_lists(self):
2067-
if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
2068-
(Version(pd.__version__) < Version("2.0.0"))):
2069-
# TODO: regression in pandas with numpy 1.25dev
2070-
# https://github.com/pandas-dev/pandas/issues/50360
2071-
pytest.skip("Regression in pandas with numpy 1.25")
20722062
data = OrderedDict([
20732063
('nan_ints', [[None, 1], [2, 3]]),
20742064
('ints', [[0, 1], [2, 3]]),
@@ -2118,11 +2108,6 @@ def test_infer_numpy_array(self):
21182108
_check_pandas_roundtrip(df, expected_schema=expected_schema)
21192109

21202110
def test_to_list_of_structs_pandas(self):
2121-
if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
2122-
(Version(pd.__version__) < Version("2.0.0"))):
2123-
# TODO: regression in pandas with numpy 1.25dev
2124-
# https://github.com/pandas-dev/pandas/issues/50360
2125-
pytest.skip("Regression in pandas with numpy 1.25")
21262111
ints = pa.array([1, 2, 3], pa.int32())
21272112
strings = pa.array([['a', 'b'], ['c', 'd'], ['e', 'f']],
21282113
pa.list_(pa.string()))
@@ -2192,11 +2177,6 @@ def test_array_from_nested_arrays(self):
21922177
assert result.equals(expected)
21932178

21942179
def test_nested_large_list(self):
2195-
if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
2196-
(Version(pd.__version__) < Version("2.0.0"))):
2197-
# TODO: regression in pandas with numpy 1.25dev
2198-
# https://github.com/pandas-dev/pandas/issues/50360
2199-
pytest.skip("Regression in pandas with numpy 1.25")
22002180
s = (pa.array([[[1, 2, 3], [4]], None],
22012181
type=pa.large_list(pa.large_list(pa.int64())))
22022182
.to_pandas())
@@ -2950,11 +2930,11 @@ def _fully_loaded_dataframe_example():
29502930

29512931
@pytest.mark.parametrize('columns', ([b'foo'], ['foo']))
29522932
def test_roundtrip_with_bytes_unicode(columns):
2953-
if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
2954-
# TODO: regression in pandas, should be fixed before final 2.0.0
2933+
if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"):
2934+
# TODO: regression in pandas, hopefully fixed in next version
29552935
# https://issues.apache.org/jira/browse/ARROW-18394
29562936
# https://github.com/pandas-dev/pandas/issues/50127
2957-
pytest.skip("Regression in pandas 2.0.0.dev")
2937+
pytest.skip("Regression in pandas 2.0.0")
29582938

29592939
df = pd.DataFrame(columns=columns)
29602940
table1 = pa.Table.from_pandas(df)

0 commit comments

Comments
 (0)