Skip to content

Commit 2668932

Browse files
authored
BUG: DataFrame.shift(axis=1) with EADtype (#53832)
* BUG: DataFrame.shift(axis=1) with EADtype * GH refs, final
1 parent 124585b commit 2668932

File tree

4 files changed

+81
-39
lines changed

4 files changed

+81
-39
lines changed

doc/source/whatsnew/v2.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -535,11 +535,12 @@ Other
535535
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
536536
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
537537
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
538+
- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`)
539+
- Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`)
538540
- Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`)
539541
- Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`)
540542
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
541543
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
542-
-
543544

544545
.. ***DO NOT USE THIS SECTION***
545546

pandas/core/frame.py

+35-37
Original file line numberDiff line numberDiff line change
@@ -5501,45 +5501,41 @@ def shift(
55015501
) -> DataFrame:
55025502
axis = self._get_axis_number(axis)
55035503

5504+
if freq is not None and fill_value is not lib.no_default:
5505+
# GH#53832
5506+
raise ValueError(
5507+
"Cannot pass both 'freq' and 'fill_value' to "
5508+
f"{type(self).__name__}.shift"
5509+
)
5510+
55045511
ncols = len(self.columns)
5505-
if (
5506-
axis == 1
5507-
and periods != 0
5508-
and freq is None
5509-
and fill_value is lib.no_default
5510-
and ncols > 0
5511-
):
5512-
# We will infer fill_value to match the closest column
5513-
5514-
# Use a column that we know is valid for our column's dtype GH#38434
5515-
label = self.columns[0]
5516-
5517-
if periods > 0:
5518-
result = self.iloc[:, :-periods]
5519-
for col in range(min(ncols, abs(periods))):
5520-
# TODO(EA2D): doing this in a loop unnecessary with 2D EAs
5521-
# Define filler inside loop so we get a copy
5522-
filler = self.iloc[:, 0].shift(len(self))
5523-
result.insert(0, label, filler, allow_duplicates=True)
5524-
else:
5525-
result = self.iloc[:, -periods:]
5526-
for col in range(min(ncols, abs(periods))):
5527-
# Define filler inside loop so we get a copy
5528-
filler = self.iloc[:, -1].shift(len(self))
5529-
result.insert(
5530-
len(result.columns), label, filler, allow_duplicates=True
5531-
)
5512+
arrays = self._mgr.arrays
5513+
if axis == 1 and periods != 0 and ncols > 0 and freq is None:
5514+
if fill_value is lib.no_default:
5515+
# We will infer fill_value to match the closest column
55325516

5533-
result.columns = self.columns.copy()
5534-
return result
5535-
elif (
5536-
axis == 1
5537-
and periods != 0
5538-
and fill_value is not lib.no_default
5539-
and ncols > 0
5540-
):
5541-
arrays = self._mgr.arrays
5542-
if len(arrays) > 1 or (
5517+
# Use a column that we know is valid for our column's dtype GH#38434
5518+
label = self.columns[0]
5519+
5520+
if periods > 0:
5521+
result = self.iloc[:, :-periods]
5522+
for col in range(min(ncols, abs(periods))):
5523+
# TODO(EA2D): doing this in a loop unnecessary with 2D EAs
5524+
# Define filler inside loop so we get a copy
5525+
filler = self.iloc[:, 0].shift(len(self))
5526+
result.insert(0, label, filler, allow_duplicates=True)
5527+
else:
5528+
result = self.iloc[:, -periods:]
5529+
for col in range(min(ncols, abs(periods))):
5530+
# Define filler inside loop so we get a copy
5531+
filler = self.iloc[:, -1].shift(len(self))
5532+
result.insert(
5533+
len(result.columns), label, filler, allow_duplicates=True
5534+
)
5535+
5536+
result.columns = self.columns.copy()
5537+
return result
5538+
elif len(arrays) > 1 or (
55435539
# If we only have one block and we know that we can't
55445540
# keep the same dtype (i.e. the _can_hold_element check)
55455541
# then we can go through the reindex_indexer path
@@ -5567,6 +5563,8 @@ def shift(
55675563
)
55685564
res_df = self._constructor_from_mgr(mgr, axes=mgr.axes)
55695565
return res_df.__finalize__(self, method="shift")
5566+
else:
5567+
return self.T.shift(periods=periods, fill_value=fill_value).T
55705568

55715569
return super().shift(
55725570
periods=periods, freq=freq, axis=axis, fill_value=fill_value

pandas/core/generic.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -10477,7 +10477,7 @@ def shift(
1047710477
periods: int = 1,
1047810478
freq=None,
1047910479
axis: Axis = 0,
10480-
fill_value: Hashable = None,
10480+
fill_value: Hashable = lib.no_default,
1048110481
) -> Self:
1048210482
"""
1048310483
Shift index by desired number of periods with an optional time `freq`.
@@ -10575,6 +10575,15 @@ def shift(
1057510575
2020-01-07 30 33 37
1057610576
2020-01-08 45 48 52
1057710577
"""
10578+
axis = self._get_axis_number(axis)
10579+
10580+
if freq is not None and fill_value is not lib.no_default:
10581+
# GH#53832
10582+
raise ValueError(
10583+
"Cannot pass both 'freq' and 'fill_value' to "
10584+
f"{type(self).__name__}.shift"
10585+
)
10586+
1057810587
if periods == 0:
1057910588
return self.copy(deep=None)
1058010589

@@ -10588,6 +10597,11 @@ def shift(
1058810597
new_data, axes=new_data.axes
1058910598
).__finalize__(self, method="shift")
1059010599

10600+
return self._shift_with_freq(periods, axis, freq)
10601+
10602+
@final
10603+
def _shift_with_freq(self, periods: int, axis: int, freq) -> Self:
10604+
# see shift.__doc__
1059110605
# when freq is given, index is shifted, data is not
1059210606
index = self._get_axis(axis)
1059310607

pandas/tests/frame/methods/test_shift.py

+29
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,35 @@
1717

1818

1919
class TestDataFrameShift:
20+
def test_shift_axis1_with_valid_fill_value_one_array(self):
21+
# Case with axis=1 that does not go through the "len(arrays)>1" path
22+
# in DataFrame.shift
23+
data = np.random.randn(5, 3)
24+
df = DataFrame(data)
25+
res = df.shift(axis=1, periods=1, fill_value=12345)
26+
expected = df.T.shift(periods=1, fill_value=12345).T
27+
tm.assert_frame_equal(res, expected)
28+
29+
# same but with an 1D ExtensionArray backing it
30+
df2 = df[[0]].astype("Float64")
31+
res2 = df2.shift(axis=1, periods=1, fill_value=12345)
32+
expected2 = DataFrame([12345] * 5, dtype="Float64")
33+
tm.assert_frame_equal(res2, expected2)
34+
35+
def test_shift_disallow_freq_and_fill_value(self, frame_or_series):
36+
# Can't pass both!
37+
obj = frame_or_series(
38+
np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H")
39+
)
40+
41+
msg = "Cannot pass both 'freq' and 'fill_value' to (Series|DataFrame).shift"
42+
with pytest.raises(ValueError, match=msg):
43+
obj.shift(1, fill_value=1, freq="H")
44+
45+
if frame_or_series is DataFrame:
46+
with pytest.raises(ValueError, match=msg):
47+
obj.shift(1, axis=1, fill_value=1, freq="H")
48+
2049
@pytest.mark.parametrize(
2150
"input_data, output_data",
2251
[(np.empty(shape=(0,)), []), (np.ones(shape=(2,)), [np.nan, 1.0])],

0 commit comments

Comments
 (0)