Skip to content

Commit 1b1739e

Browse files
meeseeksmachinekeechongtan
authored andcommitted
Backport PR #29393: BUG: GH25495 incorrect dtype when using .loc to set Categorical value for column in 1-row DataFrame (#31393)
Co-authored-by: Kee Chong Tan <[email protected]>
1 parent c2ab2bf commit 1b1739e

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,7 @@ Indexing
10911091
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
10921092
- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`)
10931093
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
1094+
- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`)
10941095
- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`)
10951096
- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
10961097
- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`)

pandas/core/internals/blocks.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,11 @@ def setitem(self, indexer, value):
872872

873873
# length checking
874874
check_setitem_lengths(indexer, value, values)
875-
875+
exact_match = (
876+
len(arr_value.shape)
877+
and arr_value.shape[0] == values.shape[0]
878+
and arr_value.size == values.size
879+
)
876880
if is_empty_indexer(indexer, arr_value):
877881
# GH#8669 empty indexers
878882
pass
@@ -882,14 +886,21 @@ def setitem(self, indexer, value):
882886
# be e.g. a list; see GH#6043
883887
values[indexer] = value
884888

885-
# if we are an exact match (ex-broadcasting),
886-
# then use the resultant dtype
887889
elif (
888-
len(arr_value.shape)
889-
and arr_value.shape[0] == values.shape[0]
890-
and arr_value.size == values.size
890+
exact_match
891+
and is_categorical_dtype(arr_value.dtype)
892+
and not is_categorical_dtype(values)
891893
):
894+
# GH25495 - If the current dtype is not categorical,
895+
# we need to create a new categorical block
892896
values[indexer] = value
897+
return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
898+
899+
# if we are an exact match (ex-broadcasting),
900+
# then use the resultant dtype
901+
elif exact_match:
902+
values[indexer] = value
903+
893904
try:
894905
values = values.astype(arr_value.dtype)
895906
except ValueError:

pandas/tests/frame/indexing/test_categorical.py

+10
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,16 @@ def test_functions_no_warnings(self):
354354
df.value, range(0, 105, 10), right=False, labels=labels
355355
)
356356

357+
def test_setitem_single_row_categorical(self):
358+
# GH 25495
359+
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
360+
categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"])
361+
df.loc[:, "Alpha"] = categories
362+
363+
result = df["Alpha"]
364+
expected = Series(categories, index=df.index, name="Alpha")
365+
tm.assert_series_equal(result, expected)
366+
357367
def test_loc_indexing_preserves_index_category_dtype(self):
358368
# GH 15166
359369
df = DataFrame(

0 commit comments

Comments
 (0)