From 979dcca2d56ea903a3ba0f7c8745194f08f04314 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Feb 2020 22:20:01 +0000 Subject: [PATCH 1/8] CI: add pydocstyle to code_checks --- ci/code_checks.sh | 5 +++++ environment.yml | 1 + pandas/core/arrays/datetimes.py | 5 +---- pandas/core/tools/datetimes.py | 2 +- pandas/io/excel/_openpyxl.py | 1 + requirements-dev.txt | 1 + 6 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index bb7d8a388e6e2..601340c4a7128 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -316,6 +316,11 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then + + MSG='Validate docstrings (D201, D202, D204, D207, D208, D209, D213, D300, D409, D411, D412, D414)' ; echo $MSG + pydocstyle pandas --select=D201,D202,D204,D207,D208,D209,D213,D300,D409,D411,D412,D414 + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03, SA05)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05 RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/environment.yml b/environment.yml index 5f1184e921119..de3027f367451 100644 --- a/environment.yml +++ b/environment.yml @@ -23,6 +23,7 @@ dependencies: - isort # check that imports are in the right order - mypy=0.730 - pycodestyle # used by flake8 + - pydocstyle # documentation - gitpython # obtain contributors from git for whatsnew diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a75536e46e60d..3ec765f933643 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -559,7 +559,6 @@ def __iter__(self): ------ tstamp : Timestamp """ - # convert in chunks of 10k for efficiency data = self.asi8 length = len(self) @@ -820,7 +819,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): # noqa """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1640,7 +1639,6 @@ def to_julian_date(self): 0 Julian date is noon January 1, 4713 BC. https://en.wikipedia.org/wiki/Julian_day """ - # http://mysite.verizon.net/aesir_research/date/jdalg2.htm year = np.asarray(self.year) month = np.asarray(self.month) @@ -1705,7 +1703,6 @@ def sequence_to_dt64ns( ------ TypeError : PeriodDType data is passed """ - inferred_freq = None dtype = _validate_dt64_dtype(dtype) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6d45ddd29d783..27ff1fe3022ef 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -538,7 +538,7 @@ def to_datetime( infer_datetime_format=False, origin="unix", cache=True, -): +): # noqa: D207 """ Convert argument to datetime. diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index a96c0f814e2d8..716f2b0fe2341 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -393,6 +393,7 @@ def _convert_to_protection(cls, protection_dict): Returns ------- + openpyxl.styles.Protection """ from openpyxl.styles import Protection diff --git a/requirements-dev.txt b/requirements-dev.txt index 08cbef2c7fc6b..d8e2c265c74b7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -14,6 +14,7 @@ flake8-rst>=0.6.0,<=0.7.0 isort mypy==0.730 pycodestyle +pydocstyle gitpython sphinx nbconvert>=5.4.1 From b6bd7a5410885fbbdecef29afd2c4cd33e66c04d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Feb 2020 23:22:14 +0000 Subject: [PATCH 2/8] check fail --- pandas/io/excel/_openpyxl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 716f2b0fe2341..c6644e43c6319 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -392,9 +392,9 @@ def _convert_to_protection(cls, protection_dict): 'hidden' Returns - ------- - openpyxl.styles.Protection + ------ """ + from openpyxl.styles import Protection return Protection(**protection_dict) From 719e602b61e39e1b8c199cefc0822991700f1ea1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Feb 2020 23:47:19 +0000 Subject: [PATCH 3/8] Revert "check fail" This reverts commit b6bd7a5410885fbbdecef29afd2c4cd33e66c04d. --- pandas/io/excel/_openpyxl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index c6644e43c6319..716f2b0fe2341 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -392,9 +392,9 @@ def _convert_to_protection(cls, protection_dict): 'hidden' Returns - ------ + ------- + openpyxl.styles.Protection """ - from openpyxl.styles import Protection return Protection(**protection_dict) From 39242d248c43749896baf04806b94a74d2c82b22 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 16 Feb 2020 20:50:18 +0000 Subject: [PATCH 4/8] add TODO for #noqa --- pandas/core/arrays/datetimes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3ec765f933643..8599065830b57 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -819,6 +819,8 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) + # TODO: remove # noqa once https://github.com/PyCQA/pydocstyle/pull/441 + # is merged def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): # noqa """ Localize tz-naive Datetime Array/Index to tz-aware From 89baa1cf67004c6cad317ff8966fee07edfbfe77 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 17 Feb 2020 09:16:47 +0000 Subject: [PATCH 5/8] add pydocstyle to setup.cfg --- setup.cfg | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/setup.cfg b/setup.cfg index 4a900e581c353..4c70122c329ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,20 @@ ignore = E402, # module level import not at top of file exclude = doc/source/development/contributing_docstring.rst +[pydocstyle] +select = D201, # No blank lines allowed before function docstring + D202, # No blank lines allowed after function docstring + D204, # 1 blank line required after class docstring + D207, # Docstring is under-indented + D208, # Docstring is over-indented + D209, # Multi-line docstring closing quotes should be on a separate line + D213, # Multi-line docstring summary should start at the second line + D300, # Use triple double quotes + D409, # Section underline should match the length of its name + D411, # Missing blank line before section + D412, # No blank lines allowed between a section header and its content + D414, # Section has no content + [tool:pytest] # sync minversion with setup.cfg & install.rst minversion = 4.0.2 From ed4b76496b417ed0e7afb488ceb01ac3bfd736fa Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 17 Feb 2020 09:20:34 +0000 Subject: [PATCH 6/8] update code-checks.sh --- ci/code_checks.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 601340c4a7128..71aad6347085c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -316,9 +316,11 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then + echo "pydocstyle --version" + pydocstyle --version - MSG='Validate docstrings (D201, D202, D204, D207, D208, D209, D213, D300, D409, D411, D412, D414)' ; echo $MSG - pydocstyle pandas --select=D201,D202,D204,D207,D208,D209,D213,D300,D409,D411,D412,D414 + MSG='Validate docstrings using pydocstyle (see setup.cfg for selected error codes)' ; echo $MSG + pydocstyle pandas RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03, SA05)' ; echo $MSG From fe25c6b57222e05c359372d030b35f1fbec9ee39 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 17 Feb 2020 09:21:06 +0000 Subject: [PATCH 7/8] check failures --- pandas/core/arrays/base.py | 81 +++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 45 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b5da6d4c11616..bc67eb101d5e2 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -33,11 +33,10 @@ def try_cast_to_ea(cls_or_instance, obj, dtype=None): - """ - Call to `_from_sequence` that returns the object unchanged on Exception. + """Call to `_from_sequence` that returns the object unchanged on Exception. Parameters - ---------- + --------- cls_or_instance : ExtensionArray subclass or instance obj : arraylike Values to pass to cls._from_sequence @@ -45,7 +44,6 @@ def try_cast_to_ea(cls_or_instance, obj, dtype=None): Returns ------- - ExtensionArray or obj """ try: result = cls_or_instance._from_sequence(obj, dtype=dtype) @@ -181,7 +179,6 @@ class ExtensionArray: By default, ExtensionArrays are not hashable. Immutable subclasses may override this behavior. """ - # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = "extension" @@ -238,6 +235,7 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): + """ Reconstruct an ExtensionArray after factorization. @@ -253,6 +251,7 @@ def _from_factorized(cls, values, original): factorize ExtensionArray.factorize """ + raise AbstractMethodError(cls) # ------------------------------------------------------------------------ @@ -287,13 +286,13 @@ def __getitem__(self, item): if the slice is length 0 or 1. For a boolean mask, return an instance of ``ExtensionArray``, filtered - to the values where ``item`` is True. + to the values where ``item`` is True. """ raise AbstractMethodError(self) def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: """ - Set one or more values inplace. + Set one or more values inplace. This method is not required to satisfy the pandas extension array interface. @@ -348,8 +347,7 @@ def __len__(self) -> int: def __iter__(self): """ - Iterate over elements of the array. - """ + Iterate over elements of the array.""" # This needs to be implemented so that pandas recognizes extension # arrays as list-like. The default implementation makes successive # calls to ``__getitem__``, which may be slower than necessary. @@ -395,9 +393,7 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default): @property def dtype(self) -> ExtensionDtype: - """ - An instance of 'ExtensionDtype'. - """ + "An instance of 'ExtensionDtype'." raise AbstractMethodError(self) @property @@ -430,9 +426,9 @@ def nbytes(self) -> int: def astype(self, dtype, copy=True): """ Cast to a NumPy array with 'dtype'. - Parameters ---------- + dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True @@ -475,9 +471,6 @@ def _values_for_argsort(self) -> np.ndarray: Returns ------- - ndarray - The transformed values should maintain the ordering between values - within the array. See Also -------- @@ -510,8 +503,7 @@ def argsort( See Also -------- - numpy.argsort : Sorting implementation used internally. - """ + numpy.argsort : Sorting implementation used internally.""" # Implementor note: You have two places to override the behavior of # argsort. # 1. _values_for_argsort : construct the values passed to np.argsort @@ -585,39 +577,39 @@ def dropna(self): def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: """ - Shift values by desired number. + Shift values by desired number. - Newly introduced missing values are filled with - ``self.dtype.na_value``. + Newly introduced missing values are filled with + ``self.dtype.na_value``. - .. versionadded:: 0.24.0 + .. versionadded:: 0.24.0 - Parameters - ---------- - periods : int, default 1 - The number of periods to shift. Negative values are allowed - for shifting backwards. + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. - fill_value : object, optional - The scalar value to use for newly introduced missing values. - The default is ``self.dtype.na_value``. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. - .. versionadded:: 0.24.0 + .. versionadded:: 0.24.0 - Returns - ------- - ExtensionArray - Shifted. + Returns + ------- + ExtensionArray + Shifted. - Notes - ----- - If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is - returned. + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. - If ``periods > len(self)``, then an array of size - len(self) is returned, with all values filled with - ``self.dtype.na_value``. - """ + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. if not len(self) or periods == 0: @@ -638,8 +630,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArra return self._concat_same_type([a, b]) def unique(self): - """ - Compute the ExtensionArray of unique values. + """Compute the ExtensionArray of unique values. Returns ------- From e3ca7f7018b8b9c5db3535b1759aff817655d2da Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 17 Feb 2020 09:56:22 +0000 Subject: [PATCH 8/8] Revert "check failures" This reverts commit fe25c6b57222e05c359372d030b35f1fbec9ee39. --- pandas/core/arrays/base.py | 81 +++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bc67eb101d5e2..b5da6d4c11616 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -33,10 +33,11 @@ def try_cast_to_ea(cls_or_instance, obj, dtype=None): - """Call to `_from_sequence` that returns the object unchanged on Exception. + """ + Call to `_from_sequence` that returns the object unchanged on Exception. Parameters - --------- + ---------- cls_or_instance : ExtensionArray subclass or instance obj : arraylike Values to pass to cls._from_sequence @@ -44,6 +45,7 @@ def try_cast_to_ea(cls_or_instance, obj, dtype=None): Returns ------- + ExtensionArray or obj """ try: result = cls_or_instance._from_sequence(obj, dtype=dtype) @@ -179,6 +181,7 @@ class ExtensionArray: By default, ExtensionArrays are not hashable. Immutable subclasses may override this behavior. """ + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = "extension" @@ -235,7 +238,6 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): - """ Reconstruct an ExtensionArray after factorization. @@ -251,7 +253,6 @@ def _from_factorized(cls, values, original): factorize ExtensionArray.factorize """ - raise AbstractMethodError(cls) # ------------------------------------------------------------------------ @@ -286,13 +287,13 @@ def __getitem__(self, item): if the slice is length 0 or 1. For a boolean mask, return an instance of ``ExtensionArray``, filtered - to the values where ``item`` is True. + to the values where ``item`` is True. """ raise AbstractMethodError(self) def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: """ - Set one or more values inplace. + Set one or more values inplace. This method is not required to satisfy the pandas extension array interface. @@ -347,7 +348,8 @@ def __len__(self) -> int: def __iter__(self): """ - Iterate over elements of the array.""" + Iterate over elements of the array. + """ # This needs to be implemented so that pandas recognizes extension # arrays as list-like. The default implementation makes successive # calls to ``__getitem__``, which may be slower than necessary. @@ -393,7 +395,9 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default): @property def dtype(self) -> ExtensionDtype: - "An instance of 'ExtensionDtype'." + """ + An instance of 'ExtensionDtype'. + """ raise AbstractMethodError(self) @property @@ -426,9 +430,9 @@ def nbytes(self) -> int: def astype(self, dtype, copy=True): """ Cast to a NumPy array with 'dtype'. + Parameters ---------- - dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True @@ -471,6 +475,9 @@ def _values_for_argsort(self) -> np.ndarray: Returns ------- + ndarray + The transformed values should maintain the ordering between values + within the array. See Also -------- @@ -503,7 +510,8 @@ def argsort( See Also -------- - numpy.argsort : Sorting implementation used internally.""" + numpy.argsort : Sorting implementation used internally. + """ # Implementor note: You have two places to override the behavior of # argsort. # 1. _values_for_argsort : construct the values passed to np.argsort @@ -577,39 +585,39 @@ def dropna(self): def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: """ - Shift values by desired number. + Shift values by desired number. - Newly introduced missing values are filled with - ``self.dtype.na_value``. + Newly introduced missing values are filled with + ``self.dtype.na_value``. - .. versionadded:: 0.24.0 + .. versionadded:: 0.24.0 - Parameters - ---------- - periods : int, default 1 - The number of periods to shift. Negative values are allowed - for shifting backwards. + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. - fill_value : object, optional - The scalar value to use for newly introduced missing values. - The default is ``self.dtype.na_value``. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. - .. versionadded:: 0.24.0 + .. versionadded:: 0.24.0 - Returns - ------- - ExtensionArray - Shifted. + Returns + ------- + ExtensionArray + Shifted. - Notes - ----- - If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is - returned. + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. - If ``periods > len(self)``, then an array of size - len(self) is returned, with all values filled with - ``self.dtype.na_value``. - """ + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. if not len(self) or periods == 0: @@ -630,7 +638,8 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArra return self._concat_same_type([a, b]) def unique(self): - """Compute the ExtensionArray of unique values. + """ + Compute the ExtensionArray of unique values. Returns -------