Skip to content

Commit 902be02

Browse files
authored
ENH: add ignore index to DataFrame / Series.sample (#42168)
1 parent 539fba6 commit 902be02

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ Other enhancements
278278
- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
279279
- :meth:`Series.between` can now accept ``left`` or ``right`` as arguments to ``inclusive`` to include only the left or right boundary (:issue:`40245`)
280280
- :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`)
281+
- :meth:`DataFrame.sample` now accepts the ``ignore_index`` argument to reset the index after sampling, similar to :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.sort_values` (:issue:`38581`)
281282

282283
.. ---------------------------------------------------------------------------
283284

pandas/core/generic.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -5144,11 +5144,12 @@ def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries:
51445144
def sample(
51455145
self: FrameOrSeries,
51465146
n=None,
5147-
frac=None,
5148-
replace=False,
5147+
frac: float | None = None,
5148+
replace: bool_t = False,
51495149
weights=None,
51505150
random_state: RandomState | None = None,
5151-
axis=None,
5151+
axis: Axis | None = None,
5152+
ignore_index: bool_t = False,
51525153
) -> FrameOrSeries:
51535154
"""
51545155
Return a random sample of items from an axis of object.
@@ -5189,6 +5190,10 @@ def sample(
51895190
axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
51905191
Axis to sample. Accepts axis number or name. Default is stat axis
51915192
for given data type (0 for Series and DataFrames).
5193+
ignore_index : bool, default False
5194+
If True, the resulting index will be labeled 0, 1, …, n - 1.
5195+
5196+
.. versionadded:: 1.3.0
51925197
51935198
Returns
51945199
-------
@@ -5350,7 +5355,11 @@ def sample(
53505355
)
53515356

53525357
locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
5353-
return self.take(locs, axis=axis)
5358+
result = self.take(locs, axis=axis)
5359+
if ignore_index:
5360+
result.index = ibase.default_index(len(result))
5361+
5362+
return result
53545363

53555364
@final
53565365
@doc(klass=_shared_doc_kwargs["klass"])

pandas/tests/frame/methods/test_sample.py

+10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
Series,
910
)
1011
import pandas._testing as tm
@@ -326,3 +327,12 @@ def test_sample_is_copy(self):
326327

327328
with tm.assert_produces_warning(None):
328329
df2["d"] = 1
330+
331+
def test_sample_ignore_index(self):
332+
# GH 38581
333+
df = DataFrame(
334+
{"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10}
335+
)
336+
result = df.sample(3, ignore_index=True)
337+
expected_index = Index([0, 1, 2])
338+
tm.assert_index_equal(result.index, expected_index)

0 commit comments

Comments
 (0)