Skip to content

Commit 63caeb6

Browse files
CLN: Refactor code and clean up according to PR feedback
1 parent 67088fa commit 63caeb6

File tree

3 files changed

+23
-30
lines changed

3 files changed

+23
-30
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ Categorical
639639
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
640640
- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
641641
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
642+
-
642643

643644
Datetimelike
644645
^^^^^^^^^^^^

pandas/tests/reshape/test_pivot.py

+11-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
datetime,
44
timedelta,
55
)
6-
import io
76
from itertools import product
87
import re
98

@@ -16,6 +15,7 @@
1615

1716
import pandas as pd
1817
from pandas import (
18+
ArrowDtype,
1919
Categorical,
2020
DataFrame,
2121
Grouper,
@@ -2853,29 +2853,24 @@ def test_pivot_margins_with_none_index(self):
28532853
)
28542854
tm.assert_frame_equal(result, expected)
28552855

2856-
# Ignore deprecation raised by old versions of pyarrow. Already fixed in
2857-
# newer versions
28582856
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
28592857
def test_pivot_with_pyarrow_categorical(self):
28602858
# GH#53051
28612859

2862-
pytest.importorskip("pyarrow")
2860+
pa = pytest.importorskip("pyarrow")
28632861

28642862
# Create dataframe with categorical column
2865-
df = (
2866-
DataFrame(
2867-
[("A", 1), ("B", 2), ("C", 3)],
2868-
columns=["string_column", "number_column"],
2869-
)
2870-
.astype({"string_column": "string", "number_column": "float32"})
2871-
.astype({"string_column": "category", "number_column": "float32"})
2872-
)
2863+
df = DataFrame(
2864+
{"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
2865+
).astype({"string_column": "category", "number_column": "float32"})
28732866

28742867
# Convert dataframe to pyarrow backend
2875-
with io.BytesIO() as buffer:
2876-
df.to_parquet(buffer)
2877-
buffer.seek(0) # Reset buffer position
2878-
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
2868+
df = df.astype(
2869+
{
2870+
"string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
2871+
"number_column": "float[pyarrow]",
2872+
}
2873+
)
28792874

28802875
# Check that pivot works
28812876
df = df.pivot(columns=["string_column"], values=["number_column"])

pandas/tests/test_multilevel.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import datetime
2-
import io
32

43
import numpy as np
54
import pytest
65

76
import pandas as pd
87
from pandas import (
8+
ArrowDtype,
99
DataFrame,
1010
MultiIndex,
1111
Series,
@@ -325,23 +325,20 @@ def test_multiindex_dt_with_nan(self):
325325
def test_multiindex_with_pyarrow_categorical(self):
326326
# GH#53051
327327

328-
pytest.importorskip("pyarrow")
328+
pa = pytest.importorskip("pyarrow")
329329

330330
# Create dataframe with categorical column
331-
df = (
332-
DataFrame(
333-
[["A", 1], ["B", 2], ["C", 3]],
334-
columns=["string_column", "number_column"],
335-
)
336-
.astype({"string_column": "string", "number_column": "float32"})
337-
.astype({"string_column": "category", "number_column": "float32"})
338-
)
331+
df = DataFrame(
332+
{"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
333+
).astype({"string_column": "category", "number_column": "float32"})
339334

340335
# Convert dataframe to pyarrow backend
341-
with io.BytesIO() as buffer:
342-
df.to_parquet(buffer)
343-
buffer.seek(0) # Reset buffer position
344-
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
336+
df = df.astype(
337+
{
338+
"string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
339+
"number_column": "float[pyarrow]",
340+
}
341+
)
345342

346343
# Check that index can be set
347344
df.set_index(["string_column", "number_column"])

0 commit comments

Comments
 (0)