Skip to content

Commit 0c50950

Browse files
keechongtanjreback
authored andcommitted
BUG: GH25495 incorrect dtype when using .loc to set Categorical value for column in 1-row DataFrame (pandas-dev#29393)
1 parent a9b61a9 commit 0c50950

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,7 @@ Indexing
10861086
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
10871087
- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`)
10881088
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
1089+
- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`)
10891090
- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`)
10901091
- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
10911092
- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`)

pandas/core/internals/blocks.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,11 @@ def setitem(self, indexer, value):
876876

877877
# length checking
878878
check_setitem_lengths(indexer, value, values)
879-
879+
exact_match = (
880+
len(arr_value.shape)
881+
and arr_value.shape[0] == values.shape[0]
882+
and arr_value.size == values.size
883+
)
880884
if is_empty_indexer(indexer, arr_value):
881885
# GH#8669 empty indexers
882886
pass
@@ -886,14 +890,21 @@ def setitem(self, indexer, value):
886890
# be e.g. a list; see GH#6043
887891
values[indexer] = value
888892

889-
# if we are an exact match (ex-broadcasting),
890-
# then use the resultant dtype
891893
elif (
892-
len(arr_value.shape)
893-
and arr_value.shape[0] == values.shape[0]
894-
and arr_value.size == values.size
894+
exact_match
895+
and is_categorical_dtype(arr_value.dtype)
896+
and not is_categorical_dtype(values)
895897
):
898+
# GH25495 - If the current dtype is not categorical,
899+
# we need to create a new categorical block
896900
values[indexer] = value
901+
return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
902+
903+
# if we are an exact match (ex-broadcasting),
904+
# then use the resultant dtype
905+
elif exact_match:
906+
values[indexer] = value
907+
897908
try:
898909
values = values.astype(arr_value.dtype)
899910
except ValueError:

pandas/tests/frame/indexing/test_categorical.py

+10
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,16 @@ def test_functions_no_warnings(self):
354354
df.value, range(0, 105, 10), right=False, labels=labels
355355
)
356356

357+
def test_setitem_single_row_categorical(self):
358+
# GH 25495
359+
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
360+
categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"])
361+
df.loc[:, "Alpha"] = categories
362+
363+
result = df["Alpha"]
364+
expected = Series(categories, index=df.index, name="Alpha")
365+
tm.assert_series_equal(result, expected)
366+
357367
def test_loc_indexing_preserves_index_category_dtype(self):
358368
# GH 15166
359369
df = DataFrame(

0 commit comments

Comments
 (0)