Skip to content

Commit 9b6155c

Browse files
committed
BUG: Sampling over selected groupbys does not reflect the selection
1 parent 9c0a1eb commit 9b6155c

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

doc/source/whatsnew/v1.3.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Bug fixes
3434
~~~~~~~~~
3535
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
3636
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
37+
- Fixed bug in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42401`)
3738
-
3839

3940
.. ---------------------------------------------------------------------------

pandas/core/internals/managers.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,12 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
386386
# We only get here with fill_value not-lib.no_default
387387
ncols = self.shape[0]
388388
if periods > 0:
389-
indexer = [-1] * periods + list(range(ncols - periods))
389+
indexer = np.concatenate(
390+
[np.repeat(-1, periods), np.arange(ncols - periods)]
391+
)
390392
else:
391393
nper = abs(periods)
392-
indexer = list(range(nper, ncols)) + [-1] * nper
394+
indexer = np.concatenate([np.arange(nper, ncols), np.repeat(-1, nper)])
393395
result = self.reindex_indexer(
394396
self.items,
395397
indexer,

pandas/tests/frame/methods/test_shift.py

+26
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,32 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager):
183183

184184
tm.assert_frame_equal(result, expected)
185185

186+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support
187+
def test_shift_axis1_multiple_blocks_with_int_fill(self):
188+
# GH#42401
189+
df1 = DataFrame(np.random.randint(1000, size=(5, 3)))
190+
df2 = DataFrame(np.random.randint(1000, size=(5, 2)))
191+
df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
192+
result = df3.shift(2, axis=1, fill_value=np.int_(0))
193+
assert len(df3._mgr.blocks) == 2
194+
195+
expected = df3.take([-1, -1, 0, 1], axis=1)
196+
expected.iloc[:, :2] = np.int_(0)
197+
expected.columns = df3.columns
198+
199+
tm.assert_frame_equal(result, expected)
200+
201+
# Case with periods < 0
202+
df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1)
203+
result = df3.shift(-2, axis=1, fill_value=np.int_(0))
204+
assert len(df3._mgr.blocks) == 2
205+
206+
expected = df3.take([2, 3, -1, -1], axis=1)
207+
expected.iloc[:, -2:] = np.int_(0)
208+
expected.columns = df3.columns
209+
210+
tm.assert_frame_equal(result, expected)
211+
186212
@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
187213
def test_tshift(self, datetime_frame):
188214
# TODO: remove this test when tshift deprecation is enforced

0 commit comments

Comments
 (0)