Skip to content

DOC: EX01 ({Categorical, Interval, Multi, Datetime, Timedelta}-Index) #53925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.util.hash_pandas_object \
pandas_object \
pandas.api.interchange.from_dataframe \
pandas.CategoricalIndex.codes \
pandas.CategoricalIndex.categories \
pandas.CategoricalIndex.ordered \
pandas.CategoricalIndex.reorder_categories \
pandas.CategoricalIndex.set_categories \
pandas.CategoricalIndex.as_ordered \
pandas.CategoricalIndex.as_unordered \
pandas.CategoricalIndex.equals \
pandas.IntervalIndex.values \
pandas.IntervalIndex.to_tuples \
pandas.MultiIndex.dtypes \
pandas.MultiIndex.drop \
pandas.DatetimeIndex.snap \
pandas.DatetimeIndex.as_unit \
pandas.DatetimeIndex.to_pydatetime \
pandas.DatetimeIndex.to_series \
pandas.DatetimeIndex.mean \
pandas.DatetimeIndex.std \
pandas.TimedeltaIndex \
pandas.core.window.rolling.Rolling.max \
pandas.core.window.rolling.Rolling.cov \
pandas.core.window.rolling.Rolling.skew \
Expand Down
141 changes: 112 additions & 29 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
Attributes
----------
categories : Index
The categories of this categorical
The categories of this categorical.
codes : ndarray
The codes (integer positions, which point to the categories) of this
categorical, read only.
Expand Down Expand Up @@ -760,23 +760,32 @@ def categories(self) -> Index:

Examples
--------
For :class:`pandas.Series`:

For Series:
Comment on lines +763 to -764
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!


>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.categories
Index(['a', 'b', 'c'], dtype='object')

>>> raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"],)
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], categories=['b', 'c', 'd'])
Comment on lines -770 to +769
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for the future - in general, let's not bother with stylistic changes like changing double quotes to single quotes, at least until we have an automated tool to enforce this on docstrings

>>> ser = pd.Series(raw_cat)
>>> ser.cat.categories
Index(['b', 'c', 'd'], dtype='object')

For Categorical:
For :class:`pandas.Categorical`:

>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.categories
Index(['a', 'b'], dtype='object')

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'c', 'b', 'a', 'c', 'b'])
>>> ci.categories
Index(['a', 'b', 'c'], dtype='object')

>>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a'])
>>> ci.categories
Index(['c', 'b', 'a'], dtype='object')
"""
return self.dtype.categories

Expand All @@ -787,19 +796,18 @@ def ordered(self) -> Ordered:

Examples
--------
For :class:`pandas.Series`:

For Series:

>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.ordered
False

>>> raw_cat = pd.Categorical(["a", "b", "c", "a"], ordered=True)
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser.cat.ordered
True

For Categorical:
For :class:`pandas.Categorical`:

>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.ordered
Expand All @@ -808,13 +816,23 @@ def ordered(self) -> Ordered:
>>> cat = pd.Categorical(['a', 'b'], ordered=False)
>>> cat.ordered
False

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b'], ordered=True)
>>> ci.ordered
True

>>> ci = pd.CategoricalIndex(['a', 'b'], ordered=False)
>>> ci.ordered
False
"""
return self.dtype.ordered

@property
def codes(self) -> np.ndarray:
"""
The category codes of this categorical.
The category codes of this categorical index.

Codes are an array of integers which are the positions of the actual
values in the categories array.
Expand All @@ -825,13 +843,25 @@ def codes(self) -> np.ndarray:
Returns
-------
ndarray[int]
A non-writable view of the `codes` array.
A non-writable view of the ``codes`` array.

Examples
--------
For :class:`pandas.Categorical`:

>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.codes
array([0, 1], dtype=int8)

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'])
>>> ci.codes
array([0, 1, 2, 0, 1, 2], dtype=int8)

>>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a'])
>>> ci.codes
array([2, 0], dtype=int8)
"""
v = self._codes.view()
v.flags.writeable = False
Expand Down Expand Up @@ -915,12 +945,23 @@ def as_ordered(self) -> Self:

Examples
--------
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
For :class:`pandas.Series`:

>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.ordered
False
>>> ser = ser.cat.as_ordered()
>>> ser.cat.ordered
True

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'])
>>> ci.ordered
False
>>> ci = ci.as_ordered()
>>> ci.ordered
True
"""
return self.set_ordered(True)

Expand All @@ -935,24 +976,36 @@ def as_unordered(self) -> Self:

Examples
--------
>>> raw_cate = pd.Categorical(["a", "b", "c"],
... categories=["a", "b", "c"], ordered=True)
>>> ser = pd.Series(raw_cate)
For :class:`pandas.Series`:

>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser.cat.ordered
True
>>> ser = ser.cat.as_unordered()
>>> ser.cat.ordered
False

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'], ordered=True)
>>> ci.ordered
True
>>> ci = ci.as_unordered()
>>> ci.ordered
False
"""
return self.set_ordered(False)

def set_categories(self, new_categories, ordered=None, rename: bool = False):
"""
Set the categories to the specified new_categories.
Set the categories to the specified new categories.

`new_categories` can include new categories (which will result in
``new_categories`` can include new categories (which will result in
unused categories) or remove old categories (which results in values
set to NaN). If `rename==True`, the categories will simple be renamed
set to ``NaN``). If ``rename=True``, the categories will simply be renamed
(less or more items than in old categories will result in values set to
NaN or in unused categories respectively).
``NaN`` or in unused categories respectively).

This method can be used to perform more than one action of adding,
removing, and reordering simultaneously and is therefore faster than
Expand Down Expand Up @@ -994,23 +1047,41 @@ def set_categories(self, new_categories, ordered=None, rename: bool = False):

Examples
--------
>>> raw_cate = pd.Categorical(["a", "b", "c", "A"],
... categories=["a", "b", "c"], ordered=True)
>>> ser = pd.Series(raw_cate)
For :class:`pandas.Series`:

>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'A'],
... categories=['a', 'b', 'c'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser
0 a
1 b
2 c
3 NaN
dtype: category
Categories (3, object): ['a' < 'b' < 'c']
>>> ser.cat.set_categories(["A", "B", "C"], rename=True)

>>> ser.cat.set_categories(['A', 'B', 'C'], rename=True)
0 A
1 B
2 C
3 NaN
dtype: category
Categories (3, object): ['A' < 'B' < 'C']

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'A'],
... categories=['a', 'b', 'c'], ordered=True)
>>> ci
CategoricalIndex(['a', 'b', 'c', nan], categories=['a', 'b', 'c'],
ordered=True, dtype='category')

>>> ci.set_categories(['A', 'b', 'c'])
CategoricalIndex([nan, 'b', 'c', nan], categories=['A', 'b', 'c'],
ordered=True, dtype='category')
>>> ci.set_categories(['A', 'b', 'c'], rename=True)
CategoricalIndex(['A', 'b', 'c', nan], categories=['A', 'b', 'c'],
ordered=True, dtype='category')
"""

if ordered is None:
Expand Down Expand Up @@ -1108,7 +1179,7 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
"""
Reorder categories as specified in new_categories.

`new_categories` need to include all old categories and no new category
``new_categories`` need to include all old categories and no new category
items.

Parameters
Expand Down Expand Up @@ -1140,7 +1211,9 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:

Examples
--------
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
For :class:`pandas.Series`:

>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser = ser.cat.reorder_categories(['c', 'b', 'a'], ordered=True)
>>> ser
0 a
Expand All @@ -1149,14 +1222,24 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
3 a
dtype: category
Categories (3, object): ['c' < 'b' < 'a']
>>> ser = ser.sort_values()
>>> ser

>>> ser.sort_values()
2 c
1 b
0 a
3 a
dtype: category
Categories (3, object): ['c' < 'b' < 'a']

For :class:`pandas.CategoricalIndex`:

>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'])
>>> ci
CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'],
ordered=False, dtype='category')
>>> ci.reorder_categories(['c', 'b', 'a'], ordered=True)
CategoricalIndex(['a', 'b', 'c', 'a'], categories=['c', 'b', 'a'],
ordered=True, dtype='category')
"""
if (
len(self.categories) != len(new_categories)
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,17 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):

Examples
--------
For :class:`pandas.DatetimeIndex`:

>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
>>> idx
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.mean()
Timestamp('2001-01-02 00:00:00')

For :class:`pandas.TimedeltaIndex`:

>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
>>> tdelta_idx
TimedeltaIndex(['1 days', '2 days', '3 days'],
Expand Down
43 changes: 34 additions & 9 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,11 +1090,19 @@ def tz_localize(

def to_pydatetime(self) -> npt.NDArray[np.object_]:
"""
Return an ndarray of datetime.datetime objects.
Return an ndarray of ``datetime.datetime`` objects.

Returns
-------
numpy.ndarray

Examples
--------
>>> idx = pd.date_range('2018-02-27', periods=3)
>>> idx.to_pydatetime()
array([datetime.datetime(2018, 2, 27, 0, 0),
datetime.datetime(2018, 2, 28, 0, 0),
datetime.datetime(2018, 3, 1, 0, 0)], dtype=object)
"""
return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)

Expand Down Expand Up @@ -2097,23 +2105,40 @@ def std(
"""
Return sample standard deviation over requested axis.

Normalized by N-1 by default. This can be changed using the ddof argument
Normalized by `N-1` by default. This can be changed using ``ddof``.

Parameters
----------
axis : int optional, default None
Axis for the function to be applied on.
For `Series` this parameter is unused and defaults to `None`.
axis : int, optional
Axis for the function to be applied on. For :class:`pandas.Series`
this parameter is unused and defaults to ``None``.
ddof : int, default 1
Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.
Degrees of Freedom. The divisor used in calculations is `N - ddof`,
where `N` represents the number of elements.
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result will be
NA.
Exclude NA/null values. If an entire row/column is ``NA``, the result
will be ``NA``.

Returns
-------
Timedelta

See Also
--------
numpy.ndarray.std : Returns the standard deviation of the array elements
along given axis.
Series.std : Return sample standard deviation over requested axis.

Examples
--------
For :class:`pandas.DatetimeIndex`:

>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
>>> idx
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.std()
Timedelta('1 days 00:00:00')
"""
# Because std is translation-invariant, we can get self.std
# by calculating (self - Timestamp(0)).std, and we can do it
Expand Down
Loading