diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f66098633b45e..4bc88d05cb28a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -51,6 +51,7 @@ Other enhancements - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) +- :meth:`DataFrame.sample` now accepts the ``ignore_index`` argument to reset the index after sampling, similar to :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.sort_values` (:issue:`38581`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c3db8ef58deb6..54482914e5ba0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -35,6 +35,7 @@ from pandas._libs import lib from pandas._libs.tslibs import Period, Tick, Timestamp, to_offset from pandas._typing import ( + ArrayLike, Axis, CompressionOptions, FilePathOrBuffer, @@ -5143,12 +5144,13 @@ def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: @final def sample( self: FrameOrSeries, - n=None, - frac=None, - replace=False, - weights=None, + n: Optional[int] = None, + frac: Optional[float] = None, + replace: bool_t = False, + weights: Optional[Union[str, ArrayLike]] = None, random_state=None, - axis=None, + axis: Optional[Axis] = None, + ignore_index: Optional[bool_t] = False, ) -> FrameOrSeries: """ Return a random sample of items from an axis of object. @@ -5190,6 +5192,10 @@ def sample( axis : {0 or ‘index’, 1 or ‘columns’, None}, default None Axis to sample. Accepts axis number or name. Default is stat axis for given data type (0 for Series and DataFrames). + ignore_index : bool + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.3.0 Returns ------- @@ -5347,7 +5353,11 @@ def sample( ) locs = rs.choice(axis_length, size=n, replace=replace, p=weights) - return self.take(locs, axis=axis) + result = self.take(locs, axis=axis) + if ignore_index: + result.index = ibase.default_index(len(result)) + + return result @final @doc(klass=_shared_doc_kwargs["klass"]) diff --git a/pandas/tests/generic/methods/test_sample.py b/pandas/tests/generic/methods/test_sample.py index 6c1c352d8286c..424b31ca9ba2d 100644 --- a/pandas/tests/generic/methods/test_sample.py +++ b/pandas/tests/generic/methods/test_sample.py @@ -3,7 +3,7 @@ from pandas.compat.numpy import np_version_under1p17 -from pandas import DataFrame, Series +from pandas import DataFrame, Index, Series import pandas._testing as tm import pandas.core.common as com @@ -323,3 +323,12 @@ def test_sample_is_copy(self): with tm.assert_produces_warning(None): df2["d"] = 1 + + def test_sample_ignore_index(self): + # GH 38581 + df = DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + result = df.sample(3, ignore_index=True) + expected_index = Index([0, 1, 2]) + tm.assert_index_equal(result.index, expected_index)