diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 732f9c5181b97..067d88a666bb3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -278,6 +278,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests indexes' ; echo $MSG + pytest -q --doctest-modules pandas/core/indexes/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests tools' ; echo $MSG pytest -q --doctest-modules pandas/core/tools/ RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -286,10 +290,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/reshape/ RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests interval classes' ; echo $MSG - pytest -q --doctest-modules pandas/core/indexes/interval.py - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests arrays'; echo $MSG pytest -q --doctest-modules pandas/core/arrays/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index feb9881ffdb81..2908d468bcae0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -129,9 +129,41 @@ class DatetimeProperties(Properties): Examples -------- - >>> s.dt.hour - >>> s.dt.second - >>> s.dt.quarter + >>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: datetime64[ns] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) + >>> hours_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: datetime64[ns] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q")) + >>> quarters_series + 0 2000-03-31 + 1 2000-06-30 + 2 2000-09-30 + dtype: datetime64[ns] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + dtype: int64 Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. @@ -200,13 +232,24 @@ class TimedeltaProperties(Properties): """ Accessor object for datetimelike properties of the Series values. - Examples - -------- - >>> s.dt.hours - >>> s.dt.seconds - Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.timedelta_range(start="1 second", periods=3, freq="S") + ... ) + >>> seconds_series + 0 00:00:01 + 1 00:00:02 + 2 00:00:03 + dtype: timedelta64[ns] + >>> seconds_series.dt.seconds + 0 1 + 1 2 + 2 3 + dtype: int64 """ def to_pytimedelta(self) -> np.ndarray: @@ -229,7 +272,7 @@ def to_pytimedelta(self) -> np.ndarray: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d")) >>> s 0 0 days 1 1 days @@ -239,9 +282,9 @@ def to_pytimedelta(self) -> np.ndarray: dtype: timedelta64[ns] >>> s.dt.to_pytimedelta() - array([datetime.timedelta(0), datetime.timedelta(1), - datetime.timedelta(2), datetime.timedelta(3), - datetime.timedelta(4)], dtype=object) + array([datetime.timedelta(0), datetime.timedelta(days=1), + datetime.timedelta(days=2), datetime.timedelta(days=3), + datetime.timedelta(days=4)], dtype=object) """ return self._get_values().to_pytimedelta() @@ -289,14 +332,60 @@ class PeriodProperties(Properties): """ Accessor object for datetimelike properties of the Series values. - Examples - -------- - >>> s.dt.hour - >>> s.dt.second - >>> s.dt.quarter - Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.period_range( + ... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s" + ... ) + ... ) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + 3 2000-01-01 00:00:03 + dtype: period[S] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> hours_series = pd.Series( + ... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h") + ... ) + >>> hours_series + 0 2000-01-01 00:00 + 1 2000-01-01 01:00 + 2 2000-01-01 02:00 + 3 2000-01-01 03:00 + dtype: period[H] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> quarters_series = pd.Series( + ... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC") + ... ) + >>> quarters_series + 0 2000Q1 + 1 2000Q2 + 2 2000Q3 + 3 2000Q4 + dtype: period[Q-DEC] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5db860a02865b..5fec68d257167 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1841,7 +1841,7 @@ def is_object(self) -> bool: >>> idx = pd.Index(["Watermelon", "Orange", "Apple", ... "Watermelon"]).astype("category") - >>> idx.object() + >>> idx.is_object() False >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) @@ -2053,7 +2053,7 @@ def isna(self): >>> idx Float64Index([5.2, 6.0, nan], dtype='float64') >>> idx.isna() - array([False, False, True], dtype=bool) + array([False, False, True]) Empty strings are not considered NA values. None is considered an NA value. @@ -2062,7 +2062,7 @@ def isna(self): >>> idx Index(['black', '', 'red', None], dtype='object') >>> idx.isna() - array([False, False, False, True], dtype=bool) + array([False, False, False, True]) For datetimes, `NaT` (Not a Time) is considered as an NA value. @@ -2072,7 +2072,7 @@ def isna(self): DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], dtype='datetime64[ns]', freq=None) >>> idx.isna() - array([False, True, True, True], dtype=bool) + array([False, True, True, True]) """ return self._isnan @@ -4790,8 +4790,9 @@ def isin(self, values, level=None): ... ['red', 'blue', 'green']], ... names=('number', 'color')) >>> midx - MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']], - codes=[[0, 1, 2], [2, 0, 1]], + MultiIndex([(1, 'red'), + (2, 'blue'), + (3, 'green')], names=['number', 'color']) Check whether the strings in the 'color' level of the MultiIndex @@ -4859,11 +4860,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): >>> idx = pd.Index(list('abcd')) >>> idx.slice_indexer(start='b', end='c') - slice(1, 3) + slice(1, 3, None) >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) >>> idx.slice_indexer(start='b', end=('c', 'g')) - slice(1, 3) + slice(1, 3, None) """ start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) @@ -5434,11 +5435,10 @@ def ensure_index_from_sequences(sequences, names=None): Examples -------- - >>> ensure_index_from_sequences([[1, 2, 3]], names=['name']) + >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) Int64Index([1, 2, 3], dtype='int64', name='name') - >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], - names=['L1', 'L2']) + >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), ('a', 'b')], names=['L1', 'L2']) @@ -5471,6 +5471,10 @@ def ensure_index(index_like, copy=False): ------- index : Index or MultiIndex + See Also + -------- + ensure_index_from_sequences + Examples -------- >>> ensure_index(['a', 'b']) @@ -5481,13 +5485,8 @@ def ensure_index(index_like, copy=False): >>> ensure_index([['a', 'a'], ['b', 'c']]) MultiIndex([('a', 'b'), - ('a', 'c')], - dtype='object') - ) - - See Also - -------- - ensure_index_from_sequences + ('a', 'c')], + ) """ if isinstance(index_like, Index): if copy: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f4814f2efb910..073e1967678ec 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -138,21 +138,25 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): Examples -------- - >>> pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c']) - CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') # noqa + >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') ``CategoricalIndex`` can also be instantiated from a ``Categorical``: - >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"]) >>> pd.CategoricalIndex(c) - CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') # noqa + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') Ordered ``CategoricalIndex`` can have a min and max value. - >>> ci = pd.CategoricalIndex(['a','b','c','a','b','c'], ordered=True, - ... categories=['c', 'b', 'a']) + >>> ci = pd.CategoricalIndex( + ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] + ... ) >>> ci - CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['c', 'b', 'a'], ordered=True, dtype='category') # noqa + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['c', 'b', 'a'], ordered=True, dtype='category') >>> ci.min() 'c' """ @@ -652,7 +656,7 @@ def map(self, mapper): >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) >>> idx CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], - ordered=False, dtype='category') + ordered=False, dtype='category') >>> idx.map(lambda x: x.upper()) CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], ordered=False, dtype='category') diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 10d62b522a255..4e2d07ddf9225 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -762,10 +762,26 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), - (2, 'one'), (2, 'two'), - (3, 'one'), (3, 'two')], - names=['foo', 'bar']) + >>> idx = pd.MultiIndex.from_tuples( + ... [ + ... (1, "one"), + ... (1, "two"), + ... (2, "one"), + ... (2, "two"), + ... (3, "one"), + ... (3, "two") + ... ], + ... names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two'), + (3, 'one'), + (3, 'two')], + names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) MultiIndex([('a', 1), ('a', 2), @@ -798,10 +814,12 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) MultiIndex([('a', 1), - ('a', 2), - ('b', 1), - ('b', 2)], - names=['foo', 'bar']) + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) """ @@ -907,11 +925,16 @@ def set_codes(self, codes, level=None, inplace=False, verify_integrity=True): Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), - (1, 'two'), - (2, 'one'), - (2, 'two')], - names=['foo', 'bar']) + >>> idx = pd.MultiIndex.from_tuples( + ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), (1, 'one'), @@ -2751,8 +2774,7 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) >>> mi.get_loc_level('e', level='B') - (array([False, True, False], dtype=bool), - Index(['b'], dtype='object', name='A')) + (array([False, True, False]), Index(['b'], dtype='object', name='A')) >>> mi.get_loc_level(['b', 'e']) (1, None) @@ -3275,7 +3297,46 @@ def union(self, other, sort=None): ------- Index - >>> index.union(index2) + Examples + -------- + >>> idx1 = pd.MultiIndex.from_arrays( + ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] + ... ) + >>> idx1 + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue')], + ) + >>> idx2 = pd.MultiIndex.from_arrays( + ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] + ... ) + >>> idx2 + MultiIndex([(3, 'Red'), + (3, 'Green'), + (2, 'Red'), + (2, 'Green')], + ) + + >>> idx1.union(idx2) + MultiIndex([(1, 'Blue'), + (1, 'Red'), + (2, 'Blue'), + (2, 'Green'), + (2, 'Red'), + (3, 'Green'), + (3, 'Red')], + ) + + >>> idx1.union(idx2, sort=False) + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue'), + (3, 'Red'), + (3, 'Green'), + (2, 'Green')], + ) """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8aaf828787179..1f565828ec7a5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -138,7 +138,9 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): Examples -------- - >>> idx = pd.PeriodIndex(year=year_arr, quarter=q_arr) + >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) + >>> idx + PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]', freq='Q-DEC') """ _typ = "periodindex" @@ -775,10 +777,10 @@ def period_range( Examples -------- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') - PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', - '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', - '2017-10', '2017-11', '2017-12', '2018-01'], - dtype='period[M]', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', + '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', + '2018-01'], + dtype='period[M]', freq='M') If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor endpoints for a ``PeriodIndex`` with frequency matching that of the diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 62f063b4eed02..765b948f13e96 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -351,7 +351,7 @@ def timedelta_range( >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', '5 days 00:00:00'], - dtype='timedelta64[ns]', freq=None) + dtype='timedelta64[ns]', freq='32H') """ if freq is None and com.any_none(periods, start, end): freq = "D"