Skip to content

Commit e799d33

Browse files
TST: Add tests for faulty behavior relating to pyarrow categoricals
1 parent dc11990 commit e799d33

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

pandas/tests/reshape/test_pivot.py

+40
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
datetime,
44
timedelta,
55
)
6+
import io
67
from itertools import product
78
import re
89

@@ -2851,3 +2852,42 @@ def test_pivot_margins_with_none_index(self):
28512852
),
28522853
)
28532854
tm.assert_frame_equal(result, expected)
2855+
2856+
def test_pivot_with_pyarrow_categorical(self):
2857+
# GH#53051
2858+
2859+
# Create dataframe with categorical colum
2860+
df = (
2861+
pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"])
2862+
.astype({"string_column": "string", "number_column": "float32"})
2863+
.astype({"string_column": "category", "number_column": "float32"})
2864+
)
2865+
2866+
# Convert dataframe to pyarrow backend
2867+
with io.BytesIO() as buffer:
2868+
df.to_parquet(buffer)
2869+
buffer.seek(0) # Reset buffer position
2870+
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
2871+
2872+
2873+
# Check that pivot works
2874+
df = df.pivot(columns=["string_column"], values=["number_column"])
2875+
2876+
# Assert that values of result are correct to prevent silent failure
2877+
multi_index = pd.MultiIndex.from_arrays(
2878+
[
2879+
["number_column", "number_column", "number_column"],
2880+
["A", "B", "C"]
2881+
],
2882+
names=(None, "string_column")
2883+
)
2884+
df_expected = pd.DataFrame(
2885+
[
2886+
[1.0, np.nan, np.nan],
2887+
[np.nan, 2.0, np.nan],
2888+
[np.nan, np.nan, 3.0]
2889+
],
2890+
columns=multi_index
2891+
)
2892+
tm.assert_frame_equal(df, df_expected, check_dtype=False, check_column_type=False)
2893+

pandas/tests/test_multilevel.py

+21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import io
23

34
import numpy as np
45
import pytest
@@ -318,6 +319,26 @@ def test_multiindex_dt_with_nan(self):
318319
expected = Series(["a", "b", "c", "d"], name=("sub", np.nan))
319320
tm.assert_series_equal(result, expected)
320321

322+
def test_multiindex_with_pyarrow_categorical(self):
323+
# GH#53051
324+
325+
# Create dataframe with categorical colum
326+
df = (
327+
pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"])
328+
.astype({"string_column": "string", "number_column": "float32"})
329+
.astype({"string_column": "category", "number_column": "float32"})
330+
)
331+
332+
# Convert dataframe to pyarrow backend
333+
with io.BytesIO() as buffer:
334+
df.to_parquet(buffer)
335+
buffer.seek(0) # Reset buffer position
336+
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
337+
338+
339+
# Check that index can be set
340+
df.set_index(["string_column", "number_column"])
341+
321342

322343
class TestSorted:
323344
"""everything you wanted to test about sorting"""

0 commit comments

Comments
 (0)