From 616b55928f12f1f048f965e592a2a265be115a82 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 18 Jul 2021 16:33:04 -0600 Subject: [PATCH 1/2] TST: Fixing broken doctests in pandas/core --- ci/code_checks.sh | 32 ++++----------------------- pandas/core/flags.py | 4 ++-- pandas/core/internals/construction.py | 7 +++--- pandas/core/resample.py | 30 +++++++++++++++++-------- 4 files changed, 31 insertions(+), 42 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 59548ecd3c710..9f59958b4e827 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -107,44 +107,20 @@ fi ### DOCTESTS ### if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then - MSG='Doctests for individual files' ; echo $MSG - pytest -q --doctest-modules \ - pandas/core/accessor.py \ - pandas/core/aggregation.py \ - pandas/core/algorithms.py \ - pandas/core/base.py \ - pandas/core/construction.py \ - pandas/core/frame.py \ - pandas/core/generic.py \ - pandas/core/indexers.py \ - pandas/core/nanops.py \ - pandas/core/series.py \ - pandas/io/sql.py - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Doctests for directories' ; echo $MSG - pytest -q --doctest-modules \ + MSG='Doctests' ; echo $MSG + python -m pytest --doctest-modules \ pandas/_libs/ \ pandas/api/ \ pandas/arrays/ \ pandas/compat/ \ - pandas/core/array_algos/ \ - pandas/core/arrays/ \ - pandas/core/computation/ \ - pandas/core/dtypes/ \ - pandas/core/groupby/ \ - pandas/core/indexes/ \ - pandas/core/ops/ \ - pandas/core/reshape/ \ - pandas/core/strings/ \ - pandas/core/tools/ \ - pandas/core/window/ \ + pandas/core \ pandas/errors/ \ pandas/io/clipboard/ \ pandas/io/json/ \ pandas/io/excel/ \ pandas/io/parsers/ \ pandas/io/sas/ \ + pandas/io/sql.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/flags.py b/pandas/core/flags.py index 6a09bfa3bd082..54be212c5633c 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -68,9 +68,9 @@ def allows_duplicate_labels(self) -> bool: Examples -------- >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a']) - >>> df.allows_duplicate_labels + >>> df.flags.allows_duplicate_labels True - >>> df.allows_duplicate_labels = False + >>> df.flags.allows_duplicate_labels = False Traceback (most recent call last): ... pandas.errors.DuplicateLabelError: Index has duplicates. diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 22cce5c614d5a..e35997c2a7e48 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -712,13 +712,14 @@ def dataclasses_to_dicts(data): Examples -------- + >>> from dataclasses import dataclass >>> @dataclass - >>> class Point: + ... class Point: ... x: int ... y: int - >>> dataclasses_to_dicts([Point(1,2), Point(2,3)]) - [{"x":1,"y":2},{"x":2,"y":3}] + >>> dataclasses_to_dicts([Point(1, 2), Point(2, 3)]) + [{'x': 1, 'y': 2}, {'x': 2, 'y': 3}] """ from dataclasses import asdict diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b9a75a6917140..27793f1aaaddf 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -286,8 +286,9 @@ def pipe( """ Examples -------- - >>> s = pd.Series([1,2,3,4,5], - index=pd.date_range('20130101', periods=5,freq='s')) + >>> s = pd.Series([1, 2, 3, 4, 5], + ... index=pd.date_range('20130101', periods=5,freq='s')) + >>> s 2013-01-01 00:00:00 1 2013-01-01 00:00:01 2 2013-01-01 00:00:02 3 @@ -296,8 +297,6 @@ def pipe( Freq: S, dtype: int64 >>> r = s.resample('2s') - DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left, - label=left, convention=start] >>> r.agg(np.sum) 2013-01-01 00:00:00 3 @@ -312,11 +311,11 @@ def pipe( 2013-01-01 00:00:04 5 5.0 5 >>> r.agg({'result' : lambda x: x.mean() / x.std(), - 'total' : np.sum}) - total result - 2013-01-01 00:00:00 3 2.121320 - 2013-01-01 00:00:02 7 4.949747 - 2013-01-01 00:00:04 5 NaN + ... 'total' : np.sum}) + result total + 2013-01-01 00:00:00 2.121320 3 + 2013-01-01 00:00:02 4.949747 7 + 2013-01-01 00:00:04 NaN 5 """ ) @@ -357,7 +356,20 @@ def transform(self, arg, *args, **kwargs): Examples -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> resampled = s.resample('15min') >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) + 2018-01-01 00:00:00 NaN + 2018-01-01 01:00:00 NaN + Freq: H, dtype: float64 """ return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs) From 90898a23e8882aae2bf7d069fe40b8c07a690394 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 18 Jul 2021 16:36:42 -0600 Subject: [PATCH 2/2] Fixing few PEP-8 violations in now passing doctest --- pandas/core/resample.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 27793f1aaaddf..87fae365d7ce7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -287,7 +287,7 @@ def pipe( Examples -------- >>> s = pd.Series([1, 2, 3, 4, 5], - ... index=pd.date_range('20130101', periods=5,freq='s')) + ... index=pd.date_range('20130101', periods=5, freq='s')) >>> s 2013-01-01 00:00:00 1 2013-01-01 00:00:01 2 @@ -304,14 +304,14 @@ def pipe( 2013-01-01 00:00:04 5 Freq: 2S, dtype: int64 - >>> r.agg(['sum','mean','max']) + >>> r.agg(['sum', 'mean', 'max']) sum mean max 2013-01-01 00:00:00 3 1.5 2 2013-01-01 00:00:02 7 3.5 4 2013-01-01 00:00:04 5 5.0 5 - >>> r.agg({'result' : lambda x: x.mean() / x.std(), - ... 'total' : np.sum}) + >>> r.agg({'result': lambda x: x.mean() / x.std(), + ... 'total': np.sum}) result total 2013-01-01 00:00:00 2.121320 3 2013-01-01 00:00:02 4.949747 7