Skip to content

Commit a2d4fd5

Browse files
Merge remote-tracking branch 'upstream/master' into apply-regr-31505
2 parents a4768ff + 79633f9 commit a2d4fd5

File tree

9 files changed

+78
-5
lines changed

9 files changed

+78
-5
lines changed

doc/source/getting_started/basics.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1973,7 +1973,7 @@ Pandas has two ways to store strings.
19731973
1. ``object`` dtype, which can hold any Python object, including strings.
19741974
2. :class:`StringDtype`, which is dedicated to strings.
19751975

1976-
Generally, we recommend using :class:`StringDtype`. See :ref:`text.types` fore more.
1976+
Generally, we recommend using :class:`StringDtype`. See :ref:`text.types` for more.
19771977

19781978
Finally, arbitrary objects may be stored using the ``object`` dtype, but should
19791979
be avoided to the extent possible (for performance and interoperability with

doc/source/whatsnew/v1.0.1.rst

+5
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ Fixed regressions
2222
- Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`)
2323
- Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
2424
- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`)
25+
- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`)
2526
- Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`)
2627
- Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`)
2728
- Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`)
2829
- Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`)
30+
- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`)
2931

3032
.. ---------------------------------------------------------------------------
3133
@@ -56,6 +58,9 @@ Bug fixes
5658

5759
- Plotting tz-aware timeseries no longer gives UserWarning (:issue:`31205`)
5860

61+
**Interval**
62+
63+
- Bug in :meth:`Series.shift` with ``interval`` dtype raising a ``TypeError`` when shifting an interval array of integers or datetimes (:issue:`34195`)
5964

6065
.. ---------------------------------------------------------------------------
6166

pandas/_libs/hashtable_class_helper.pxi.in

+6-2
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,9 @@ cdef class StringHashTable(HashTable):
670670
val = values[i]
671671

672672
if isinstance(val, str):
673-
v = get_c_string(val)
673+
# GH#31499 if we have a np.str_ get_c_string wont recognize
674+
# it as a str, even though isinstance does.
675+
v = get_c_string(<str>val)
674676
else:
675677
v = get_c_string(self.na_string_sentinel)
676678
vecs[i] = v
@@ -703,7 +705,9 @@ cdef class StringHashTable(HashTable):
703705
val = values[i]
704706

705707
if isinstance(val, str):
706-
v = get_c_string(val)
708+
# GH#31499 if we have a np.str_ get_c_string wont recognize
709+
# it as a str, even though isinstance does.
710+
v = get_c_string(<str>val)
707711
else:
708712
v = get_c_string(self.na_string_sentinel)
709713
vecs[i] = v

pandas/core/arrays/interval.py

+28
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas.core.dtypes.dtypes import IntervalDtype
2828
from pandas.core.dtypes.generic import (
2929
ABCDatetimeIndex,
30+
ABCExtensionArray,
3031
ABCIndexClass,
3132
ABCInterval,
3233
ABCIntervalIndex,
@@ -789,6 +790,33 @@ def size(self) -> int:
789790
# Avoid materializing self.values
790791
return self.left.size
791792

793+
def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray:
794+
if not len(self) or periods == 0:
795+
return self.copy()
796+
797+
if isna(fill_value):
798+
fill_value = self.dtype.na_value
799+
800+
# ExtensionArray.shift doesn't work for two reasons
801+
# 1. IntervalArray.dtype.na_value may not be correct for the dtype.
802+
# 2. IntervalArray._from_sequence only accepts NaN for missing values,
803+
# not other values like NaT
804+
805+
empty_len = min(abs(periods), len(self))
806+
if isna(fill_value):
807+
fill_value = self.left._na_value
808+
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
809+
else:
810+
empty = self._from_sequence([fill_value] * empty_len)
811+
812+
if periods > 0:
813+
a = empty
814+
b = self[:-periods]
815+
else:
816+
a = self[abs(periods) :]
817+
b = empty
818+
return self._concat_same_type([a, b])
819+
792820
def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
793821
"""
794822
Take elements from the IntervalArray.

pandas/core/indexes/multi.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,11 @@ def __len__(self) -> int:
677677
# --------------------------------------------------------------------
678678
# Levels Methods
679679

680-
@property
680+
@cache_readonly
681681
def levels(self):
682+
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
683+
# create new IndexEngine
684+
# https://github.com/pandas-dev/pandas/issues/31648
682685
result = [
683686
x._shallow_copy(name=name) for x, name in zip(self._levels, self._names)
684687
]
@@ -1302,6 +1305,9 @@ def _set_names(self, names, level=None, validate=True):
13021305
)
13031306
self._names[lev] = name
13041307

1308+
# If .levels has been accessed, the names in our cache will be stale.
1309+
self._reset_cache()
1310+
13051311
names = property(
13061312
fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
13071313
)

pandas/tests/arrays/categorical/test_constructors.py

+5
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,11 @@ def test_constructor_str_unknown(self):
408408
with pytest.raises(ValueError, match="Unknown dtype"):
409409
Categorical([1, 2], dtype="foo")
410410

411+
def test_constructor_np_strs(self):
412+
# GH#31499 Hastable.map_locations needs to work on np.str_ objects
413+
cat = pd.Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
414+
assert all(isinstance(x, np.str_) for x in cat.categories)
415+
411416
def test_constructor_from_categorical_with_dtype(self):
412417
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
413418
values = Categorical(["a", "b", "d"])

pandas/tests/arrays/interval/test_interval.py

+18
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ def test_where_raises(self, other):
8181
with pytest.raises(ValueError, match=match):
8282
ser.where([True, False, True], other=other)
8383

84+
def test_shift(self):
85+
# https://github.com/pandas-dev/pandas/issues/31495
86+
a = IntervalArray.from_breaks([1, 2, 3])
87+
result = a.shift()
88+
# int -> float
89+
expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)])
90+
tm.assert_interval_array_equal(result, expected)
91+
92+
def test_shift_datetime(self):
93+
a = IntervalArray.from_breaks(pd.date_range("2000", periods=4))
94+
result = a.shift(2)
95+
expected = a.take([-1, -1, 0], allow_fill=True)
96+
tm.assert_interval_array_equal(result, expected)
97+
98+
result = a.shift(-1)
99+
expected = a.take([1, 2, -1], allow_fill=True)
100+
tm.assert_interval_array_equal(result, expected)
101+
84102

85103
class TestSetitem:
86104
def test_set_na(self, left_right_dtypes):

pandas/tests/extension/base/methods.py

+7
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ def test_shift_empty_array(self, data, periods):
280280
expected = empty
281281
self.assert_extension_array_equal(result, expected)
282282

283+
def test_shift_zero_copies(self, data):
284+
result = data.shift(0)
285+
assert result is not data
286+
287+
result = data[:0].shift(2)
288+
assert result is not data
289+
283290
def test_shift_fill_value(self, data):
284291
arr = data[:4]
285292
fill_value = data[0]

pandas/tests/indexes/multi/test_get_set.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_set_levels_codes_directly(idx):
159159
minor_codes = [(x + 1) % 1 for x in minor_codes]
160160
new_codes = [major_codes, minor_codes]
161161

162-
msg = "can't set attribute"
162+
msg = "[Cc]an't set attribute"
163163
with pytest.raises(AttributeError, match=msg):
164164
idx.levels = new_levels
165165
with pytest.raises(AttributeError, match=msg):

0 commit comments

Comments
 (0)