Skip to content

Commit b046791

Browse files
committed
Merge remote-tracking branch 'upstream/master' into disown-tz-only-rebased
2 parents af815f8 + 04a0eac commit b046791

File tree

6 files changed

+94
-3
lines changed

6 files changed

+94
-3
lines changed

doc/source/categorical.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,8 @@ dtype in apply
11451145

11461146
Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get
11471147
a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a
1148-
basic type) and applying along columns will also convert to object.
1148+
basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected.
1149+
You can use ``fillna`` to handle missing values before applying a function.
11491150

11501151
.. ipython:: python
11511152

doc/source/whatsnew/v0.24.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,8 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
10131013

10141014
**Other changes**
10151015

1016-
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
1016+
- :meth:`~pandas.api.types.ExtensionArray.dropna` has been added (:issue:`21185`)
1017+
- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24349`)
10171018
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
10181019
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
10191020
- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`)
@@ -1310,6 +1311,7 @@ Categorical
13101311
- Bug when resampling :meth:`DataFrame.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
13111312
- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
13121313
- Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
1314+
- Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`)
13131315

13141316
Datetimelike
13151317
^^^^^^^^^^^^

pandas/core/arrays/base.py

+29
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,35 @@ def factorize(self, na_sentinel=-1):
580580
uniques = self._from_factorized(uniques, self)
581581
return labels, uniques
582582

583+
def repeat(self, repeats, axis=None):
584+
"""
585+
Repeat elements of an array.
586+
587+
.. versionadded:: 0.24.0
588+
589+
Parameters
590+
----------
591+
repeats : int
592+
This should be a non-negative integer. Repeating 0 times
593+
will return an empty array.
594+
595+
Returns
596+
-------
597+
repeated_array : ExtensionArray
598+
Same type as the input, with elements repeated `repeats` times.
599+
600+
See Also
601+
--------
602+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
603+
ExtensionArray.take : Take arbitrary positions.
604+
"""
605+
if axis is not None:
606+
raise ValueError("'axis' must be None.")
607+
if repeats < 0:
608+
raise ValueError("negative repeats are not allowed.")
609+
ind = np.arange(len(self)).repeat(repeats)
610+
return self.take(ind)
611+
583612
# ------------------------------------------------------------------------
584613
# Indexing methods
585614
# ------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ def map(self, mapper):
11661166
Maps the categories to new categories. If the mapping correspondence is
11671167
one-to-one the result is a :class:`~pandas.Categorical` which has the
11681168
same order property as the original, otherwise a :class:`~pandas.Index`
1169-
is returned.
1169+
is returned. NaN values are unaffected.
11701170
11711171
If a `dict` or :class:`~pandas.Series` is used any unmapped category is
11721172
mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
@@ -1234,6 +1234,11 @@ def map(self, mapper):
12341234
categories=new_categories,
12351235
ordered=self.ordered)
12361236
except ValueError:
1237+
# NA values are represented in self._codes with -1
1238+
# np.take causes NA values to take final element in new_categories
1239+
if np.any(self._codes == -1):
1240+
new_categories = new_categories.insert(len(new_categories),
1241+
np.nan)
12371242
return np.take(new_categories, self._codes)
12381243

12391244
__eq__ = _cat_compare_op('__eq__')

pandas/tests/extension/base/methods.py

+31
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,34 @@ def test_where_series(self, data, na_value, as_frame):
264264
if as_frame:
265265
expected = expected.to_frame(name='a')
266266
self.assert_equal(result, expected)
267+
268+
@pytest.mark.parametrize("as_series", [True, False])
269+
@pytest.mark.parametrize("repeats", [0, 1, 2])
270+
def test_repeat(self, data, repeats, as_series):
271+
a, b, c = data[:3]
272+
arr = type(data)._from_sequence([a, b, c], dtype=data.dtype)
273+
274+
if as_series:
275+
arr = pd.Series(arr)
276+
277+
result = arr.repeat(repeats)
278+
279+
if repeats == 0:
280+
expected = []
281+
elif repeats == 1:
282+
expected = [a, b, c]
283+
else:
284+
expected = [a, a, b, b, c, c]
285+
expected = type(data)._from_sequence(expected, dtype=data.dtype)
286+
if as_series:
287+
index = pd.Series(np.arange(len(arr))).repeat(repeats).index
288+
expected = pd.Series(expected, index=index)
289+
self.assert_equal(result, expected)
290+
291+
def test_repeat_raises(self, data):
292+
with pytest.raises(ValueError, match="'axis'"):
293+
data.repeat(2, axis=1)
294+
295+
with pytest.raises(ValueError,
296+
match="negative"):
297+
data.repeat(-1)

pandas/tests/indexes/test_category.py

+23
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,29 @@ def test_map_with_categorical_series(self):
311311
exp = pd.Index(["odd", "even", "odd", np.nan])
312312
tm.assert_index_equal(a.map(c), exp)
313313

314+
@pytest.mark.parametrize(
315+
(
316+
'data',
317+
'f'
318+
),
319+
(
320+
([1, 1, np.nan], pd.isna),
321+
([1, 2, np.nan], pd.isna),
322+
([1, 1, np.nan], {1: False}),
323+
([1, 2, np.nan], {1: False, 2: False}),
324+
([1, 1, np.nan], pd.Series([False, False])),
325+
([1, 2, np.nan], pd.Series([False, False, False]))
326+
))
327+
def test_map_with_nan(self, data, f): # GH 24241
328+
values = pd.Categorical(data)
329+
result = values.map(f)
330+
if data[1] == 1:
331+
expected = pd.Categorical([False, False, np.nan])
332+
tm.assert_categorical_equal(result, expected)
333+
else:
334+
expected = pd.Index([False, False, np.nan])
335+
tm.assert_index_equal(result, expected)
336+
314337
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
315338
def test_where(self, klass):
316339
i = self.create_index()

0 commit comments

Comments
 (0)