Skip to content

Commit aa9a1b3

Browse files
phoflCloseChoice
andauthored
REGR: setting numeric value in Categorical Series with enlargement raise internal error (#48106)
* fix regression when loc is used to create a new element on an categorical series * add whatsnew * fix enlarging by scalar * update due to PR discussions * remove unnecessary comment * WIP: fix nan for enlarging * add tests; fix nan in _maybe_promote * remove unnecessary statement * use any_numeric_ea_dtype for tests * Refactor * Add line back in * Add NaT * Remove whatsnew Co-authored-by: tobias.pitters <[email protected]>
1 parent e5bfbdc commit aa9a1b3

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

pandas/core/dtypes/cast.py

+6
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,12 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
591591
fv = na_value_for_dtype(dtype)
592592
return dtype, fv
593593

594+
elif isinstance(dtype, CategoricalDtype):
595+
if fill_value in dtype.categories or isna(fill_value):
596+
return dtype, fill_value
597+
else:
598+
return object, ensure_object(fill_value)
599+
594600
elif isna(fill_value):
595601
dtype = _dtype_obj
596602
if fill_value is None:

pandas/tests/indexing/test_loc.py

+49
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas as pd
1919
from pandas import (
2020
Categorical,
21+
CategoricalDtype,
2122
CategoricalIndex,
2223
DataFrame,
2324
DatetimeIndex,
@@ -1820,6 +1821,54 @@ def test_loc_getitem_sorted_index_level_with_duplicates(self):
18201821
result = df.loc[("foo", "bar")]
18211822
tm.assert_frame_equal(result, expected)
18221823

1824+
def test_additional_element_to_categorical_series_loc(self):
1825+
# GH#47677
1826+
result = Series(["a", "b", "c"], dtype="category")
1827+
result.loc[3] = 0
1828+
expected = Series(["a", "b", "c", 0], dtype="object")
1829+
tm.assert_series_equal(result, expected)
1830+
1831+
def test_additional_categorical_element_loc(self):
1832+
# GH#47677
1833+
result = Series(["a", "b", "c"], dtype="category")
1834+
result.loc[3] = "a"
1835+
expected = Series(["a", "b", "c", "a"], dtype="category")
1836+
tm.assert_series_equal(result, expected)
1837+
1838+
def test_loc_set_nan_in_categorical_series(self, any_numeric_ea_dtype):
1839+
# GH#47677
1840+
srs = Series(
1841+
[1, 2, 3],
1842+
dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
1843+
)
1844+
# enlarge
1845+
srs.loc[3] = np.nan
1846+
expected = Series(
1847+
[1, 2, 3, np.nan],
1848+
dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
1849+
)
1850+
tm.assert_series_equal(srs, expected)
1851+
# set into
1852+
srs.loc[1] = np.nan
1853+
expected = Series(
1854+
[1, np.nan, 3, np.nan],
1855+
dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
1856+
)
1857+
tm.assert_series_equal(srs, expected)
1858+
1859+
@pytest.mark.parametrize("na", (np.nan, pd.NA, None, pd.NaT))
1860+
def test_loc_consistency_series_enlarge_set_into(self, na):
1861+
# GH#47677
1862+
srs_enlarge = Series(["a", "b", "c"], dtype="category")
1863+
srs_enlarge.loc[3] = na
1864+
1865+
srs_setinto = Series(["a", "b", "c", "a"], dtype="category")
1866+
srs_setinto.loc[3] = na
1867+
1868+
tm.assert_series_equal(srs_enlarge, srs_setinto)
1869+
expected = Series(["a", "b", "c", na], dtype="category")
1870+
tm.assert_series_equal(srs_enlarge, expected)
1871+
18231872
def test_loc_getitem_preserves_index_level_category_dtype(self):
18241873
# GH#15166
18251874
df = DataFrame(

0 commit comments

Comments
 (0)