Skip to content

Commit 78c55fe

Browse files
phoflpmhatre1
authored andcommitted
BUG: astype not casting values for dictionary dtype correctly (pandas-dev#58479)
* BUG: astype not casting values for dictionary dtype correctly * Fixup
1 parent 27e3469 commit 78c55fe

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

pandas/core/arrays/arrow/array.py

+2
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ def _box_pa_array(
525525
if pa_type is not None and pa_array.type != pa_type:
526526
if pa.types.is_dictionary(pa_type):
527527
pa_array = pa_array.dictionary_encode()
528+
if pa_array.type != pa_type:
529+
pa_array = pa_array.cast(pa_type)
528530
else:
529531
try:
530532
pa_array = pa_array.cast(pa_type)

pandas/tests/extension/test_arrow.py

+8
Original file line numberDiff line numberDiff line change
@@ -3498,6 +3498,14 @@ def test_to_numpy_timestamp_to_int():
34983498
tm.assert_numpy_array_equal(result, expected)
34993499

35003500

3501+
@pytest.mark.parametrize("arrow_type", [pa.large_string(), pa.string()])
3502+
def test_cast_dictionary_different_value_dtype(arrow_type):
3503+
df = pd.DataFrame({"a": ["x", "y"]}, dtype="string[pyarrow]")
3504+
data_type = ArrowDtype(pa.dictionary(pa.int32(), arrow_type))
3505+
result = df.astype({"a": data_type})
3506+
assert result.dtypes.iloc[0] == data_type
3507+
3508+
35013509
def test_map_numeric_na_action():
35023510
ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
35033511
result = ser.map(lambda x: 42, na_action="ignore")

0 commit comments

Comments
 (0)