|
3 | 3 | datetime,
|
4 | 4 | timedelta,
|
5 | 5 | )
|
| 6 | +import io |
6 | 7 | from itertools import product
|
7 | 8 | import re
|
8 | 9 |
|
@@ -2851,3 +2852,42 @@ def test_pivot_margins_with_none_index(self):
|
2851 | 2852 | ),
|
2852 | 2853 | )
|
2853 | 2854 | tm.assert_frame_equal(result, expected)
|
| 2855 | + |
| 2856 | + def test_pivot_with_pyarrow_categorical(self): |
| 2857 | + # GH#53051 |
| 2858 | + |
| 2859 | + # Create dataframe with categorical colum |
| 2860 | + df = ( |
| 2861 | + pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"]) |
| 2862 | + .astype({"string_column": "string", "number_column": "float32"}) |
| 2863 | + .astype({"string_column": "category", "number_column": "float32"}) |
| 2864 | + ) |
| 2865 | + |
| 2866 | + # Convert dataframe to pyarrow backend |
| 2867 | + with io.BytesIO() as buffer: |
| 2868 | + df.to_parquet(buffer) |
| 2869 | + buffer.seek(0) # Reset buffer position |
| 2870 | + df = pd.read_parquet(buffer, dtype_backend="pyarrow") |
| 2871 | + |
| 2872 | + |
| 2873 | + # Check that pivot works |
| 2874 | + df = df.pivot(columns=["string_column"], values=["number_column"]) |
| 2875 | + |
| 2876 | + # Assert that values of result are correct to prevent silent failure |
| 2877 | + multi_index = pd.MultiIndex.from_arrays( |
| 2878 | + [ |
| 2879 | + ["number_column", "number_column", "number_column"], |
| 2880 | + ["A", "B", "C"] |
| 2881 | + ], |
| 2882 | + names=(None, "string_column") |
| 2883 | + ) |
| 2884 | + df_expected = pd.DataFrame( |
| 2885 | + [ |
| 2886 | + [1.0, np.nan, np.nan], |
| 2887 | + [np.nan, 2.0, np.nan], |
| 2888 | + [np.nan, np.nan, 3.0] |
| 2889 | + ], |
| 2890 | + columns=multi_index |
| 2891 | + ) |
| 2892 | + tm.assert_frame_equal(df, df_expected, check_dtype=False, check_column_type=False) |
| 2893 | + |
0 commit comments