From b7fcb545a1a298817cd7d9f8940f19992d1202d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Jan 2020 18:11:30 -0800 Subject: [PATCH 01/37] CLN: remove unnecessary overriding in subclasses (#30875) --- pandas/core/indexes/category.py | 13 ------------- pandas/core/indexes/interval.py | 18 ++---------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 41072d4ce6a93..a247a986fcb55 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -428,19 +428,6 @@ def _engine(self): codes = self.codes return self._engine_type(lambda: codes, len(self)) - # introspection - @cache_readonly - def is_unique(self) -> bool: - return self._engine.is_unique - - @property - def is_monotonic_increasing(self): - return self._engine.is_monotonic_increasing - - @property - def is_monotonic_decreasing(self) -> bool: - return self._engine.is_monotonic_decreasing - @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) def unique(self, level=None): if level is not None: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index d33ba52cc7524..1c86235f9eaa1 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -437,22 +437,8 @@ def memory_usage(self, deep: bool = False) -> int: # so return the bytes here return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) - @cache_readonly - def is_monotonic(self) -> bool: - """ - Return True if the IntervalIndex is monotonic increasing (only equal or - increasing values), else False - """ - return self.is_monotonic_increasing - - @cache_readonly - def is_monotonic_increasing(self) -> bool: - """ - Return True if the IntervalIndex is monotonic increasing (only equal or - increasing values), else False - """ - return self._engine.is_monotonic_increasing - + # IntervalTree doesn't have a is_monotonic_decreasing, so have to override + # the Index implemenation @cache_readonly def is_monotonic_decreasing(self) -> bool: """ From 6f2c509984de999b09d44efd1e96dff92038afcf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Jan 2020 15:01:10 +0100 Subject: [PATCH 02/37] DEPR: fix missing stacklevel in pandas.core.index deprecation (#30878) --- pandas/core/index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/index.py b/pandas/core/index.py index a9c8e6731a17e..8cff53d7a8b74 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -27,4 +27,5 @@ "pandas.core.index is deprecated and will be removed in a future version. " "The public classes are available in the top-level namespace.", FutureWarning, + stacklevel=2, ) From d1b9598d69af350f718128c567a856848cff595d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Jan 2020 11:22:37 -0800 Subject: [PATCH 03/37] DOC: Encourage use of pre-commit in the docs (#30864) Previously, we stated it as merely optional xref: https://github.com/pandas-dev/pandas/pull/30773 https://github.com/pandas-dev/pandas/pull/30814 --- doc/source/development/contributing.rst | 51 +++++++++++++++++-------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index 93c65ba7358c9..2dc5ed07544d1 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -635,6 +635,8 @@ many errors as possible, but it may not correct *all* of them. Thus, it is recommended that you run ``cpplint`` to double check and make any other style fixes manually. +.. _contributing.code-formatting: + Python (PEP8 / black) ~~~~~~~~~~~~~~~~~~~~~ @@ -656,19 +658,8 @@ apply ``black`` as you edit files. You should use a ``black`` version >= 19.10b0 as previous versions are not compatible with the pandas codebase. -Optionally, you may wish to setup `pre-commit hooks `_ -to automatically run ``black`` and ``flake8`` when you make a git commit. This -can be done by installing ``pre-commit``:: - - pip install pre-commit - -and then running:: - - pre-commit install - -from the root of the pandas repository. Now ``black`` and ``flake8`` will be run -each time you commit changes. You can skip these checks with -``git commit --no-verify``. +If you wish to run these checks automatically, we encourage you to use +:ref:`pre-commits ` instead. One caveat about ``git diff upstream/master -u -- "*.py" | flake8 --diff``: this command will catch any stylistic errors in your changes specifically, but @@ -676,7 +667,7 @@ be beware it may not catch all of them. For example, if you delete the only usage of an imported function, it is stylistically incorrect to import an unused function. However, style-checking the diff will not catch this because the actual import is not part of the diff. Thus, for completeness, you should -run this command, though it will take longer:: +run this command, though it may take longer:: git diff upstream/master --name-only -- "*.py" | xargs -r flake8 @@ -694,6 +685,8 @@ behaviour as follows:: This will get all the files being changed by the PR (and ending with ``.py``), and run ``flake8`` on them, one after the other. +Note that these commands can be run analogously with ``black``. + .. _contributing.import-formatting: Import formatting @@ -716,7 +709,6 @@ A summary of our current import sections ( in order ): Imports are alphabetically sorted within these sections. - As part of :ref:`Continuous Integration ` checks we run:: isort --recursive --check-only pandas @@ -740,8 +732,37 @@ to automatically format imports correctly. This will modify your local copy of t The `--recursive` flag can be passed to sort all files in a directory. +Alternatively, you can run a command similar to what was suggested for ``black`` and ``flake8`` :ref:`right above `:: + + git diff upstream/master --name-only -- "*.py" | xargs -r isort + +Where similar caveats apply if you are on OSX or Windows. + You can then verify the changes look ok, then git :ref:`commit ` and :ref:`push `. +.. _contributing.pre-commit: + +Pre-Commit +~~~~~~~~~~ + +You can run many of these styling checks manually as we have described above. However, +we encourage you to use `pre-commit hooks `_ instead +to automatically run ``black``, ``flake8``, ``isort`` when you make a git commit. This +can be done by installing ``pre-commit``:: + + pip install pre-commit + +and then running:: + + pre-commit install + +from the root of the pandas repository. Now all of the styling checks will be +run each time you commit changes without your needing to run each one manually. +In addition, using this pre-commit hook will also allow you to more easily +remain up-to-date with our code checks as they change. + +Note that if needed, you can skip these checks with ``git commit --no-verify``. + Backwards compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From 447a3b008c695095c0db009965285080a72c402c Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 10 Jan 2020 19:26:20 +0000 Subject: [PATCH 04/37] WEB: Removing Discourse links (#30890) We are not using them for now. --- web/pandas/_templates/layout.html | 5 ----- web/pandas/config.yml | 2 -- 2 files changed, 7 deletions(-) diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html index 120058afd1190..92126a7b5a2f2 100644 --- a/web/pandas/_templates/layout.html +++ b/web/pandas/_templates/layout.html @@ -84,11 +84,6 @@ -
  • - - - -
  • pandas is a fiscally sponsored project of NumFOCUS diff --git a/web/pandas/config.yml b/web/pandas/config.yml index e2a95a5039884..d1fb7ba0f7b86 100644 --- a/web/pandas/config.yml +++ b/web/pandas/config.yml @@ -50,8 +50,6 @@ navbar: target: /community/blog.html - name: "Ask a question (StackOverflow)" target: https://stackoverflow.com/questions/tagged/pandas - - name: "Discuss" - target: https://pandas.discourse.group - name: "Code of conduct" target: /community/coc.html - name: "Ecosystem" From 03cdcb62089b79d548543279978effcd2c670a63 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 10 Jan 2020 23:21:20 +0000 Subject: [PATCH 05/37] WEB: Remove from roadmap moving the docstring script (#30893) --- doc/source/development/roadmap.rst | 14 -------------- web/pandas/about/roadmap.md | 13 ------------- 2 files changed, 27 deletions(-) diff --git a/doc/source/development/roadmap.rst b/doc/source/development/roadmap.rst index 00598830e2fe9..fafe63d80249c 100644 --- a/doc/source/development/roadmap.rst +++ b/doc/source/development/roadmap.rst @@ -129,20 +129,6 @@ Some specific goals include * Improve the overall organization of the documentation and specific subsections of the documentation to make navigation and finding content easier. -Package docstring validation ----------------------------- - -To improve the quality and consistency of pandas docstrings, we've developed -tooling to check docstrings in a variety of ways. -https://github.com/pandas-dev/pandas/blob/master/scripts/validate_docstrings.py -contains the checks. - -Like many other projects, pandas uses the -`numpydoc `__ style for writing -docstrings. With the collaboration of the numpydoc maintainers, we'd like to -move the checks to a package other than pandas so that other projects can easily -use them as well. - Performance monitoring ---------------------- diff --git a/web/pandas/about/roadmap.md b/web/pandas/about/roadmap.md index 8a5c2735b3d93..35a6b3361f32e 100644 --- a/web/pandas/about/roadmap.md +++ b/web/pandas/about/roadmap.md @@ -134,19 +134,6 @@ pandas documentation. Some specific goals include subsections of the documentation to make navigation and finding content easier. -## Package docstring validation - -To improve the quality and consistency of pandas docstrings, we've -developed tooling to check docstrings in a variety of ways. - -contains the checks. - -Like many other projects, pandas uses the -[numpydoc](https://numpydoc.readthedocs.io/en/latest/) style for writing -docstrings. With the collaboration of the numpydoc maintainers, we'd -like to move the checks to a package other than pandas so that other -projects can easily use them as well. - ## Performance monitoring Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) From 0b4bac700f5a0809213e7ad9a8e78f5cb1244c62 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 11 Jan 2020 01:48:03 +0200 Subject: [PATCH 06/37] TYP: typing annotations (#30901) --- pandas/_config/display.py | 3 ++- pandas/_config/localization.py | 6 +++--- pandas/compat/numpy/function.py | 34 ++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/pandas/_config/display.py b/pandas/_config/display.py index 067b7c503baab..ef319f4447565 100644 --- a/pandas/_config/display.py +++ b/pandas/_config/display.py @@ -1,6 +1,7 @@ """ Unopinionated display configuration. """ + import locale import sys @@ -11,7 +12,7 @@ _initial_defencoding = None -def detect_console_encoding(): +def detect_console_encoding() -> str: """ Try to find the most capable encoding supported by the console. slightly modified from the way IPython handles the same issue. diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index dd1d4948aa6e3..0d68e78372d8a 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -12,7 +12,7 @@ @contextmanager -def set_locale(new_locale, lc_var=locale.LC_ALL): +def set_locale(new_locale, lc_var: int = locale.LC_ALL): """ Context manager for temporarily setting a locale. @@ -44,7 +44,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL): locale.setlocale(lc_var, current_locale) -def can_set_locale(lc, lc_var=locale.LC_ALL): +def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool: """ Check to see if we can set a locale, and subsequently get the locale, without raising an Exception. @@ -58,7 +58,7 @@ def can_set_locale(lc, lc_var=locale.LC_ALL): Returns ------- - is_valid : bool + bool Whether the passed locale can be set """ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 7158f251ad805..50f234cbf9419 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -33,13 +33,26 @@ class CompatValidator: - def __init__(self, defaults, fname=None, method=None, max_fname_arg_count=None): + def __init__( + self, + defaults, + fname=None, + method: Optional[str] = None, + max_fname_arg_count=None, + ): self.fname = fname self.method = method self.defaults = defaults self.max_fname_arg_count = max_fname_arg_count - def __call__(self, args, kwargs, fname=None, max_fname_arg_count=None, method=None): + def __call__( + self, + args, + kwargs, + fname=None, + max_fname_arg_count=None, + method: Optional[str] = None, + ) -> None: if args or kwargs: fname = self.fname if fname is None else fname max_fname_arg_count = ( @@ -300,7 +313,7 @@ def validate_take_with_convert(convert, args, kwargs): ) -def validate_window_func(name, args, kwargs): +def validate_window_func(name, args, kwargs) -> None: numpy_args = ("axis", "dtype", "out") msg = ( f"numpy operations are not valid with window objects. " @@ -315,7 +328,7 @@ def validate_window_func(name, args, kwargs): raise UnsupportedFunctionCall(msg) -def validate_rolling_func(name, args, kwargs): +def validate_rolling_func(name, args, kwargs) -> None: numpy_args = ("axis", "dtype", "out") msg = ( f"numpy operations are not valid with window objects. " @@ -330,7 +343,7 @@ def validate_rolling_func(name, args, kwargs): raise UnsupportedFunctionCall(msg) -def validate_expanding_func(name, args, kwargs): +def validate_expanding_func(name, args, kwargs) -> None: numpy_args = ("axis", "dtype", "out") msg = ( f"numpy operations are not valid with window objects. " @@ -345,7 +358,7 @@ def validate_expanding_func(name, args, kwargs): raise UnsupportedFunctionCall(msg) -def validate_groupby_func(name, args, kwargs, allowed=None): +def validate_groupby_func(name, args, kwargs, allowed=None) -> None: """ 'args' and 'kwargs' should be empty, except for allowed kwargs because all of @@ -359,16 +372,15 @@ def validate_groupby_func(name, args, kwargs, allowed=None): if len(args) + len(kwargs) > 0: raise UnsupportedFunctionCall( - f"numpy operations are not valid with " - f"groupby. Use .groupby(...).{name}() " - f"instead" + "numpy operations are not valid with groupby. " + f"Use .groupby(...).{name}() instead" ) RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") -def validate_resampler_func(method, args, kwargs): +def validate_resampler_func(method: str, args, kwargs) -> None: """ 'args' and 'kwargs' should be empty because all of their necessary parameters are explicitly listed in @@ -385,7 +397,7 @@ def validate_resampler_func(method, args, kwargs): raise TypeError("too many arguments passed in") -def validate_minmax_axis(axis): +def validate_minmax_axis(axis: Optional[int]) -> None: """ Ensure that the axis argument passed to min, max, argmin, or argmax is zero or None, as otherwise it will be incorrectly ignored. From f887eb09ba19311408717c0bed1f36732ab8f71a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Jan 2020 15:57:33 -0800 Subject: [PATCH 07/37] TYP: offsets (#30897) --- pandas/tseries/offsets.py | 90 +++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 8bb98a271bce8..d31c23c7ccf1d 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -365,7 +365,7 @@ def apply_index(self, i): "applied vectorized" ) - def is_anchored(self): + def is_anchored(self) -> bool: # TODO: Does this make sense for the general case? It would help # if there were a canonical docstring for what is_anchored means. return self.n == 1 @@ -378,7 +378,7 @@ def onOffset(self, dt): ) return self.is_on_offset(dt) - def isAnchored(self): + def isAnchored(self) -> bool: warnings.warn( "isAnchored is a deprecated, use is_anchored instead", FutureWarning, @@ -389,7 +389,7 @@ def isAnchored(self): # TODO: Combine this with BusinessMixin version by defining a whitelisted # set of attributes on each object rather than the existing behavior of # iterating over internal ``__dict__`` - def _repr_attrs(self): + def _repr_attrs(self) -> str: exclude = {"n", "inc", "normalize"} attrs = [] for attr in sorted(self.__dict__): @@ -405,7 +405,7 @@ def _repr_attrs(self): return out @property - def name(self): + def name(self) -> str: return self.rule_code def rollback(self, dt): @@ -452,15 +452,15 @@ def is_on_offset(self, dt): # way to get around weirdness with rule_code @property - def _prefix(self): + def _prefix(self) -> str: raise NotImplementedError("Prefix not defined") @property - def rule_code(self): + def rule_code(self) -> str: return self._prefix @cache_readonly - def freqstr(self): + def freqstr(self) -> str: try: code = self.rule_code except NotImplementedError: @@ -480,7 +480,7 @@ def freqstr(self): return fstr - def _offset_str(self): + def _offset_str(self) -> str: return "" @property @@ -529,11 +529,11 @@ def offset(self): # Alias for backward compat return self._offset - def _repr_attrs(self): + def _repr_attrs(self) -> str: if self.offset: attrs = [f"offset={repr(self.offset)}"] else: - attrs = None + attrs = [] out = "" if attrs: out += ": " + ", ".join(attrs) @@ -553,7 +553,7 @@ def __init__(self, n=1, normalize=False, offset=timedelta(0)): BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) - def _offset_str(self): + def _offset_str(self) -> str: def get_str(td): off_str = "" if td.days > 0: @@ -649,7 +649,7 @@ def apply_index(self, i): result = shifted.to_timestamp() + time return result - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False return dt.weekday() < 5 @@ -1087,7 +1087,7 @@ def apply(self, other): def apply_index(self, i): raise NotImplementedError - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False day64 = _to_dt64(dt, "datetime64[D]") @@ -1134,14 +1134,14 @@ class MonthOffset(SingleConstructorOffset): __init__ = BaseOffset.__init__ @property - def name(self): + def name(self) -> str: if self.is_anchored: return self.rule_code else: month = ccalendar.MONTH_ALIASES[self.n] return f"{self.code_rule}-{month}" - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False return dt.day == self._get_offset_day(dt) @@ -1333,7 +1333,7 @@ def _from_name(cls, suffix=None): return cls(day_of_month=suffix) @property - def rule_code(self): + def rule_code(self) -> str: suffix = f"-{self.day_of_month}" return self._prefix + suffix @@ -1429,7 +1429,7 @@ class SemiMonthEnd(SemiMonthOffset): _prefix = "SM" _min_day_of_month = 1 - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False days_in_month = ccalendar.get_days_in_month(dt.year, dt.month) @@ -1487,7 +1487,7 @@ class SemiMonthBegin(SemiMonthOffset): _prefix = "SMS" - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False return dt.day in (1, self.day_of_month) @@ -1556,7 +1556,7 @@ def __init__(self, n=1, normalize=False, weekday=None): if self.weekday < 0 or self.weekday > 6: raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") - def is_anchored(self): + def is_anchored(self) -> bool: return self.n == 1 and self.weekday is not None @apply_wraps @@ -1632,7 +1632,7 @@ def _end_apply_index(self, dtindex): return base + off + Timedelta(1, "ns") - Timedelta(1, "D") - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False elif self.weekday is None: @@ -1640,7 +1640,7 @@ def is_on_offset(self, dt): return dt.weekday() == self.weekday @property - def rule_code(self): + def rule_code(self) -> str: suffix = "" if self.weekday is not None: weekday = ccalendar.int_to_weekday[self.weekday] @@ -1717,7 +1717,7 @@ def __init__(self, n=1, normalize=False, week=0, weekday=0): if self.week < 0 or self.week > 3: raise ValueError(f"Week must be 0<=week<=3, got {self.week}") - def _get_offset_day(self, other): + def _get_offset_day(self, other: datetime) -> int: """ Find the day in the same month as other that has the same weekday as self.weekday and is the self.week'th such day in the month. @@ -1736,7 +1736,7 @@ def _get_offset_day(self, other): return 1 + shift_days + self.week * 7 @property - def rule_code(self): + def rule_code(self) -> str: weekday = ccalendar.int_to_weekday.get(self.weekday, "") return f"{self._prefix}-{self.week + 1}{weekday}" @@ -1785,7 +1785,7 @@ def __init__(self, n=1, normalize=False, weekday=0): if self.weekday < 0 or self.weekday > 6: raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") - def _get_offset_day(self, other): + def _get_offset_day(self, other: datetime) -> int: """ Find the day in the same month as other that has the same weekday as self.weekday and is the last such day in the month. @@ -1805,7 +1805,7 @@ def _get_offset_day(self, other): return dim - shift_days @property - def rule_code(self): + def rule_code(self) -> str: weekday = ccalendar.int_to_weekday.get(self.weekday, "") return f"{self._prefix}-{weekday}" @@ -1842,7 +1842,7 @@ def __init__(self, n=1, normalize=False, startingMonth=None): startingMonth = self._default_startingMonth object.__setattr__(self, "startingMonth", startingMonth) - def is_anchored(self): + def is_anchored(self) -> bool: return self.n == 1 and self.startingMonth is not None @classmethod @@ -1856,7 +1856,7 @@ def _from_name(cls, suffix=None): return cls(**kwargs) @property - def rule_code(self): + def rule_code(self) -> str: month = ccalendar.MONTH_ALIASES[self.startingMonth] return f"{self._prefix}-{month}" @@ -1874,7 +1874,7 @@ def apply(self, other): months = qtrs * 3 - months_since return shift_month(other, months, self._day_opt) - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False mod_month = (dt.month - self.startingMonth) % 3 @@ -1953,7 +1953,7 @@ class YearOffset(DateOffset): _adjust_dst = True _attributes = frozenset(["n", "normalize", "month"]) - def _get_offset_day(self, other): + def _get_offset_day(self, other: datetime) -> int: # override BaseOffset method to use self.month instead of other.month # TODO: there may be a more performant way to do this return liboffsets.get_day_of_month( @@ -1977,7 +1977,7 @@ def apply_index(self, dtindex): shifted, freq=dtindex.freq, dtype=dtindex.dtype ) - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False return dt.month == self.month and dt.day == self._get_offset_day(dt) @@ -1999,7 +1999,7 @@ def _from_name(cls, suffix=None): return cls(**kwargs) @property - def rule_code(self): + def rule_code(self) -> str: month = ccalendar.MONTH_ALIASES[self.month] return f"{self._prefix}-{month}" @@ -2117,12 +2117,12 @@ def __init__( if self.variation not in ["nearest", "last"]: raise ValueError(f"{self.variation} is not a valid variation") - def is_anchored(self): + def is_anchored(self) -> bool: return ( self.n == 1 and self.startingMonth is not None and self.weekday is not None ) - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False dt = datetime(dt.year, dt.month, dt.day) @@ -2217,18 +2217,18 @@ def get_year_end(self, dt): return target_date + timedelta(days_forward - 7) @property - def rule_code(self): + def rule_code(self) -> str: prefix = self._prefix suffix = self.get_rule_code_suffix() return f"{prefix}-{suffix}" - def _get_suffix_prefix(self): + def _get_suffix_prefix(self) -> str: if self.variation == "nearest": return "N" else: return "L" - def get_rule_code_suffix(self): + def get_rule_code_suffix(self) -> str: prefix = self._get_suffix_prefix() month = ccalendar.MONTH_ALIASES[self.startingMonth] weekday = ccalendar.int_to_weekday[self.weekday] @@ -2346,7 +2346,7 @@ def _offset(self): variation=self.variation, ) - def is_anchored(self): + def is_anchored(self) -> bool: return self.n == 1 and self._offset.is_anchored() def _rollback_to_year(self, other): @@ -2434,7 +2434,7 @@ def get_weeks(self, dt): return ret - def year_has_extra_week(self, dt): + def year_has_extra_week(self, dt: datetime) -> bool: # Avoid round-down errors --> normalize to get # e.g. '370D' instead of '360D23H' norm = Timestamp(dt).normalize().tz_localize(None) @@ -2445,7 +2445,7 @@ def year_has_extra_week(self, dt): assert weeks_in_year in [52, 53], weeks_in_year return weeks_in_year == 53 - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False if self._offset.is_on_offset(dt): @@ -2463,7 +2463,7 @@ def is_on_offset(self, dt): return False @property - def rule_code(self): + def rule_code(self) -> str: suffix = self._offset.get_rule_code_suffix() qtr = self.qtr_with_extra_week return f"{self._prefix}-{suffix}-{qtr}" @@ -2516,7 +2516,7 @@ def apply(self, other): ) return new - def is_on_offset(self, dt): + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False return date(dt.year, dt.month, dt.day) == easter(dt.year) @@ -2596,7 +2596,7 @@ def __eq__(self, other: Any) -> bool: # This is identical to DateOffset.__hash__, but has to be redefined here # for Python 3, because we've redefined __eq__. - def __hash__(self): + def __hash__(self) -> int: return hash(self._params) def __ne__(self, other): @@ -2617,7 +2617,7 @@ def __ne__(self, other): return True @property - def delta(self): + def delta(self) -> Timedelta: return self.n * self._inc @property @@ -2648,11 +2648,11 @@ def apply(self, other): raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") - def is_anchored(self): + def is_anchored(self) -> bool: return False -def _delta_to_tick(delta): +def _delta_to_tick(delta: timedelta) -> Tick: if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: # nanoseconds only for pd.Timedelta if delta.seconds == 0: From 6e9651e40db39e738ad3f5db09591f877159ecbb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 11 Jan 2020 01:53:41 -0800 Subject: [PATCH 08/37] BUG: pickle files left behind by tm.round_trip_pickle (#30906) --- pandas/_testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 0b81fb0f7a8d5..1fdc5d478aaf6 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -122,9 +122,9 @@ def round_trip_pickle( _path = path if _path is None: _path = f"__{rands(10)}__.pickle" - with ensure_clean(_path) as path: - pd.to_pickle(obj, _path) - return pd.read_pickle(_path) + with ensure_clean(_path) as temp_path: + pd.to_pickle(obj, temp_path) + return pd.read_pickle(temp_path) def round_trip_pathlib(writer, reader, path: Optional[str] = None): From 7f2948cad169c7b95e7a509145b66c1e599da2ba Mon Sep 17 00:00:00 2001 From: HH-MWB <50187675+HH-MWB@users.noreply.github.com> Date: Sat, 11 Jan 2020 18:11:34 -0500 Subject: [PATCH 09/37] replace syntax with f-string (#30919) --- pandas/core/arrays/period.py | 4 ++-- pandas/core/dtypes/common.py | 3 +-- pandas/core/dtypes/dtypes.py | 3 +-- pandas/core/frame.py | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8b49c2186dde0..697d759206ff9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -298,11 +298,11 @@ def __arrow_array__(self, type=None): if self.freqstr != type.freq: raise TypeError( "Not supported to convert PeriodArray to array with different" - " 'freq' ({0} vs {1})".format(self.freqstr, type.freq) + f" 'freq' ({self.freqstr} vs {type.freq})" ) else: raise TypeError( - "Not supported to convert PeriodArray to '{0}' type".format(type) + f"Not supported to convert PeriodArray to '{type}' type" ) period_type = ArrowPeriodType(self.freqstr) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5a007f28d63cb..f62f03be9b732 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -194,12 +194,11 @@ def ensure_python_int(value: Union[int, np.integer]) -> int: """ if not is_scalar(value): raise TypeError(f"Value needs to be a scalar value, was type {type(value)}") - msg = "Wrong type {} for value {}" try: new_value = int(value) assert new_value == value except (TypeError, ValueError, AssertionError): - raise TypeError(msg.format(type(value), value)) + raise TypeError(f"Wrong type {type(value)} for value {value}") return new_value diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 466ed815e8e5a..93522abc3a48f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -435,12 +435,11 @@ def __eq__(self, other: Any) -> bool: return hash(self) == hash(other) def __repr__(self) -> str_type: - tpl = "CategoricalDtype(categories={data}ordered={ordered})" if self.categories is None: data = "None, " else: data = self.categories._format_data(name=type(self).__name__) - return tpl.format(data=data, ordered=self.ordered) + return f"CategoricalDtype(categories={data}ordered={self.ordered})" @staticmethod def _hash_categories(categories, ordered: Ordered = True) -> int: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ad133f9e21a4..676b78573399c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2431,7 +2431,7 @@ def _verbose_repr(): dtype = self.dtypes.iloc[i] col = pprint_thing(col) - line_no = _put_str(" {num}".format(num=i), space_num) + line_no = _put_str(f" {i}", space_num) count = "" if show_counts: count = counts.iloc[i] From 939e7ddc6e75519f19ae98e30bafe9b9e3c21e46 Mon Sep 17 00:00:00 2001 From: Dina Date: Sun, 12 Jan 2020 01:12:33 +0200 Subject: [PATCH 10/37] CLN: F-strings (#30916) --- pandas/core/arrays/timedeltas.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c34d14f15075c..516a271042c9b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -43,8 +43,6 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick -_BAD_DTYPE = "dtype {dtype} cannot be converted to timedelta64[ns]" - def _is_convertible_to_td(key): return isinstance(key, (Tick, timedelta, np.timedelta64, str)) @@ -1064,7 +1062,7 @@ def _validate_td64_dtype(dtype): raise ValueError(msg) if not is_dtype_equal(dtype, _TD_DTYPE): - raise ValueError(_BAD_DTYPE.format(dtype=dtype)) + raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]") return dtype From b47a454b2022218b78fc68cedb3eb009c58c8bfe Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 11 Jan 2020 15:16:40 -0800 Subject: [PATCH 11/37] DOC: Fixture docs in pandas/conftest.py (#30917) --- pandas/conftest.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 3eab2186ccb94..0c964452df5da 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -65,25 +65,28 @@ def pytest_runtest_setup(item): pytest.skip("skipping high memory test since --run-high-memory was not set") -# Configurations for all tests and all test modules - - @pytest.fixture(autouse=True) def configure_tests(): + """ + Configure settings for all tests and test modules. + """ pd.set_option("chained_assignment", "raise") -# For running doctests: make np and pd names available - - @pytest.fixture(autouse=True) def add_imports(doctest_namespace): + """ + Make `np` and `pd` names available for doctests. + """ doctest_namespace["np"] = np doctest_namespace["pd"] = pd @pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) def spmatrix(request): + """ + Yields scipy sparse matrix classes. + """ from scipy import sparse return getattr(sparse, request.param + "_matrix") @@ -92,8 +95,8 @@ def spmatrix(request): @pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}") def axis(request): """ - Fixture for returning the axis numbers of a DataFrame. - """ + Fixture for returning the axis numbers of a DataFrame. + """ return request.param @@ -237,6 +240,10 @@ def all_boolean_reductions(request): @pytest.fixture(params=list(_cython_table)) def cython_table_items(request): + """ + Yields a tuple of a function and its corresponding name. Correspond to + the list of aggregator "Cython functions" used on selected table items. + """ return request.param @@ -337,6 +344,9 @@ def writable(request): @pytest.fixture(scope="module") def datetime_tz_utc(): + """ + Yields the UTC timezone object from the datetime module. + """ return timezone.utc @@ -358,6 +368,9 @@ def join_type(request): @pytest.fixture def strict_data_files(pytestconfig): + """ + Returns the configuration for the test setting `--strict-data-files`. + """ return pytestconfig.getoption("--strict-data-files") From 75ecfa448e2272aedb2352a7ee7d8bb7a8123b3e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 11 Jan 2020 15:53:35 -0800 Subject: [PATCH 12/37] CLN: remove unnecesary _date_check_type (#30932) The check doesn't do anything, and we still a raise KeyError anyways --- pandas/_libs/index.pyx | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index ac8172146d351..28d269a9a809e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -447,7 +447,6 @@ cdef class DatetimeEngine(Int64Engine): conv = maybe_datetimelike_to_i8(val) loc = values.searchsorted(conv, side='left') except TypeError: - self._date_check_type(val) raise KeyError(val) if loc == len(values) or values[loc] != conv: @@ -470,12 +469,6 @@ cdef class DatetimeEngine(Int64Engine): val = maybe_datetimelike_to_i8(val) return self.mapping.get_item(val) except (TypeError, ValueError): - self._date_check_type(val) - raise KeyError(val) - - cdef inline _date_check_type(self, object val): - hash(val) - if not util.is_integer_object(val): raise KeyError(val) def get_indexer(self, values): From 044559a7a157645934ab78391aa5de811af5be59 Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Sun, 12 Jan 2020 17:07:21 +0700 Subject: [PATCH 13/37] DOC: Fix SS03 docstring error (#30939) xref: https://github.com/pandas-dev/pandas/issues/27977 https://github.com/pandas-dev/pandas/issues/30733 --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/window/indexers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d7cabbabddf95..d7c508c890a46 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -920,7 +920,7 @@ def freq(self, value): @property def freqstr(self): """ - Return the frequency object as a string if its set, otherwise None + Return the frequency object as a string if its set, otherwise None. """ if self.freq is None: return None diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 0fa24a0ba1b5a..921cdb3c2523f 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -32,7 +32,7 @@ class BaseIndexer: - """Base class for window bounds calculations""" + """Base class for window bounds calculations.""" def __init__( self, index_array: Optional[np.ndarray] = None, window_size: int = 0, **kwargs, From 28e909c63daa451e0f70c6cc15c7ad644adc1979 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 12 Jan 2020 14:32:15 +0000 Subject: [PATCH 14/37] TYP: type up parts of series.py (#30761) --- pandas/core/generic.py | 1 - pandas/core/series.py | 117 +++++++++++++++++++++++------------------ 2 files changed, 67 insertions(+), 51 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0116207675889..03e86758b64ed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4121,7 +4121,6 @@ def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: def sort_values( self, - by=None, axis=0, ascending=True, inplace: bool_t = False, diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e1f011fde51a..ed338700f1011 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4,7 +4,18 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import IO, Any, Callable, Hashable, List, Optional +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + List, + Optional, + Tuple, + Type, +) import warnings import numpy as np @@ -12,6 +23,7 @@ from pandas._config import get_option from pandas._libs import index as libindex, lib, reshape, tslibs +from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_bool_kwarg, validate_percentile @@ -80,6 +92,9 @@ import pandas.io.formats.format as fmt import pandas.plotting +if TYPE_CHECKING: + from pandas.core.frame import DataFrame + __all__ = ["Series"] _shared_doc_kwargs = dict( @@ -356,11 +371,11 @@ def _init_dict(self, data, index=None, dtype=None): # ---------------------------------------------------------------------- @property - def _constructor(self): + def _constructor(self) -> Type["Series"]: return Series @property - def _constructor_expanddim(self): + def _constructor_expanddim(self) -> Type["DataFrame"]: from pandas.core.frame import DataFrame return DataFrame @@ -372,7 +387,7 @@ def _can_hold_na(self): _index = None - def _set_axis(self, axis, labels, fastpath=False): + def _set_axis(self, axis, labels, fastpath=False) -> None: """ Override generic, we want to set the _typ here. """ @@ -517,7 +532,7 @@ def __len__(self) -> int: """ return len(self._data) - def view(self, dtype=None): + def view(self, dtype=None) -> "Series": """ Create a new view of the Series. @@ -729,7 +744,7 @@ def __array__(self, dtype=None) -> np.ndarray: # ---------------------------------------------------------------------- - def _unpickle_series_compat(self, state): + def _unpickle_series_compat(self, state) -> None: if isinstance(state, dict): self._data = state["_data"] self.name = state["name"] @@ -760,7 +775,7 @@ def _unpickle_series_compat(self, state): # indexers @property - def axes(self): + def axes(self) -> List[Index]: """ Return a list of the row axis labels. """ @@ -770,7 +785,7 @@ def axes(self): # Indexing Methods @Appender(generic.NDFrame.take.__doc__) - def take(self, indices, axis=0, is_copy=False, **kwargs): + def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series": nv.validate_take(tuple(), kwargs) indices = ensure_platform_int(indices) @@ -816,7 +831,7 @@ def _ixs(self, i: int, axis: int = 0): else: return values[i] - def _slice(self, slobj: slice, axis: int = 0, kind=None): + def _slice(self, slobj: slice, axis: int = 0, kind=None) -> "Series": slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem") return self._get_values(slobj) @@ -1100,7 +1115,7 @@ def _set_value(self, label, value, takeable: bool = False): def _is_mixed_type(self): return False - def repeat(self, repeats, axis=None): + def repeat(self, repeats, axis=None) -> "Series": """ Repeat elements of a Series. @@ -1425,7 +1440,7 @@ def to_markdown( # ---------------------------------------------------------------------- - def items(self): + def items(self) -> Iterable[Tuple[Label, Any]]: """ Lazily iterate over (index, value) tuples. @@ -1455,13 +1470,13 @@ def items(self): return zip(iter(self.index), iter(self)) @Appender(items.__doc__) - def iteritems(self): + def iteritems(self) -> Iterable[Tuple[Label, Any]]: return self.items() # ---------------------------------------------------------------------- # Misc public methods - def keys(self): + def keys(self) -> Index: """ Return alias for index. @@ -1507,7 +1522,7 @@ def to_dict(self, into=dict): into_c = com.standardize_mapping(into) return into_c(self.items()) - def to_frame(self, name=None): + def to_frame(self, name=None) -> "DataFrame": """ Convert Series to DataFrame. @@ -1539,7 +1554,7 @@ def to_frame(self, name=None): return df - def _set_name(self, name, inplace=False): + def _set_name(self, name, inplace=False) -> "Series": """ Set the Series name. @@ -1681,7 +1696,7 @@ def count(self, level=None): out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype="int64").__finalize__(self) - def mode(self, dropna=True): + def mode(self, dropna=True) -> "Series": """ Return the mode(s) of the dataset. @@ -1766,7 +1781,7 @@ def unique(self): result = super().unique() return result - def drop_duplicates(self, keep="first", inplace=False): + def drop_duplicates(self, keep="first", inplace=False) -> "Series": """ Return Series with duplicate values removed. @@ -1843,7 +1858,7 @@ def drop_duplicates(self, keep="first", inplace=False): """ return super().drop_duplicates(keep=keep, inplace=inplace) - def duplicated(self, keep="first"): + def duplicated(self, keep="first") -> "Series": """ Indicate duplicate Series values. @@ -2062,7 +2077,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): return np.nan return self.index[i] - def round(self, decimals=0, *args, **kwargs): + def round(self, decimals=0, *args, **kwargs) -> "Series": """ Round each value in a Series to the given number of decimals. @@ -2157,7 +2172,7 @@ def quantile(self, q=0.5, interpolation="linear"): # scalar return result.iloc[0] - def corr(self, other, method="pearson", min_periods=None): + def corr(self, other, method="pearson", min_periods=None) -> float: """ Compute correlation with `other` Series, excluding missing values. @@ -2210,7 +2225,7 @@ def corr(self, other, method="pearson", min_periods=None): f"'{method}' was supplied" ) - def cov(self, other, min_periods=None): + def cov(self, other, min_periods=None) -> float: """ Compute covariance with Series, excluding missing values. @@ -2239,7 +2254,7 @@ def cov(self, other, min_periods=None): return np.nan return nanops.nancov(this.values, other.values, min_periods=min_periods) - def diff(self, periods=1): + def diff(self, periods=1) -> "Series": """ First discrete difference of element. @@ -2303,7 +2318,7 @@ def diff(self, periods=1): result = algorithms.diff(com.values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) - def autocorr(self, lag=1): + def autocorr(self, lag=1) -> float: """ Compute the lag-N autocorrelation. @@ -2446,7 +2461,7 @@ def searchsorted(self, value, side="left", sorter=None): # ------------------------------------------------------------------- # Combination - def append(self, to_append, ignore_index=False, verify_integrity=False): + def append(self, to_append, ignore_index=False, verify_integrity=False) -> "Series": """ Concatenate two or more Series. @@ -2523,8 +2538,10 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): to_concat.extend(to_append) else: to_concat = [self, to_append] - return concat( - to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity + return self._ensure_type( + concat( + to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity + ) ) def _binop(self, other, func, level=None, fill_value=None): @@ -2566,7 +2583,7 @@ def _binop(self, other, func, level=None, fill_value=None): ret = ops._construct_result(self, result, new_index, name) return ret - def combine(self, other, func, fill_value=None): + def combine(self, other, func, fill_value=None) -> "Series": """ Combine the Series with a Series or scalar according to `func`. @@ -2663,7 +2680,7 @@ def combine(self, other, func, fill_value=None): new_values = try_cast_to_ea(self._values, new_values) return self._constructor(new_values, index=new_index, name=new_name) - def combine_first(self, other): + def combine_first(self, other) -> "Series": """ Combine Series values, choosing the calling Series's values first. @@ -2703,7 +2720,7 @@ def combine_first(self, other): return this.where(notna(this), other) - def update(self, other): + def update(self, other) -> None: """ Modify Series in place using non-NA values from passed Series. Aligns on index. @@ -2762,10 +2779,10 @@ def sort_values( self, axis=0, ascending=True, - inplace=False, - kind="quicksort", - na_position="last", - ignore_index=False, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, ): """ Sort by the values. @@ -3117,7 +3134,7 @@ def sort_index( else: return result.__finalize__(self) - def argsort(self, axis=0, kind="quicksort", order=None): + def argsort(self, axis=0, kind="quicksort", order=None) -> "Series": """ Override ndarray.argsort. Argsorts the value, omitting NA/null values, and places the result in the same locations as the non-NA values. @@ -3155,7 +3172,7 @@ def argsort(self, axis=0, kind="quicksort", order=None): np.argsort(values, kind=kind), index=self.index, dtype="int64" ).__finalize__(self) - def nlargest(self, n=5, keep="first"): + def nlargest(self, n=5, keep="first") -> "Series": """ Return the largest `n` elements. @@ -3253,7 +3270,7 @@ def nlargest(self, n=5, keep="first"): """ return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() - def nsmallest(self, n=5, keep="first"): + def nsmallest(self, n=5, keep="first") -> "Series": """ Return the smallest `n` elements. @@ -3350,7 +3367,7 @@ def nsmallest(self, n=5, keep="first"): """ return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() - def swaplevel(self, i=-2, j=-1, copy=True): + def swaplevel(self, i=-2, j=-1, copy=True) -> "Series": """ Swap levels i and j in a :class:`MultiIndex`. @@ -3373,7 +3390,7 @@ def swaplevel(self, i=-2, j=-1, copy=True): self ) - def reorder_levels(self, order): + def reorder_levels(self, order) -> "Series": """ Rearrange index levels using input order. @@ -3497,7 +3514,7 @@ def unstack(self, level=-1, fill_value=None): # ---------------------------------------------------------------------- # function application - def map(self, arg, na_action=None): + def map(self, arg, na_action=None) -> "Series": """ Map values of Series according to input correspondence. @@ -3575,7 +3592,7 @@ def map(self, arg, na_action=None): new_values = super()._map_values(arg, na_action=na_action) return self._constructor(new_values, index=self.index).__finalize__(self) - def _gotitem(self, key, ndim, subset=None): + def _gotitem(self, key, ndim, subset=None) -> "Series": """ Sub-classes to define. Return a sliced object. @@ -3983,7 +4000,7 @@ def drop( level=None, inplace=False, errors="raise", - ): + ) -> "Series": """ Return Series with specified index labels removed. @@ -4124,7 +4141,7 @@ def replace( ) @Appender(generic._shared_docs["shift"] % _shared_doc_kwargs) - def shift(self, periods=1, freq=None, axis=0, fill_value=None): + def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "Series": return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value ) @@ -4183,7 +4200,7 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - def isin(self, values): + def isin(self, values) -> "Series": """ Check whether `values` are contained in Series. @@ -4239,7 +4256,7 @@ def isin(self, values): result = algorithms.isin(self, values) return self._constructor(result, index=self.index).__finalize__(self) - def between(self, left, right, inclusive=True): + def between(self, left, right, inclusive=True) -> "Series": """ Return boolean Series equivalent to left <= series <= right. @@ -4315,19 +4332,19 @@ def between(self, left, right, inclusive=True): return lmask & rmask @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) - def isna(self): + def isna(self) -> "Series": return super().isna() @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) - def isnull(self): + def isnull(self) -> "Series": return super().isnull() @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) - def notna(self): + def notna(self) -> "Series": return super().notna() @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) - def notnull(self): + def notnull(self) -> "Series": return super().notnull() def dropna(self, axis=0, inplace=False, how=None): @@ -4421,7 +4438,7 @@ def dropna(self, axis=0, inplace=False, how=None): # ---------------------------------------------------------------------- # Time series-oriented methods - def to_timestamp(self, freq=None, how="start", copy=True): + def to_timestamp(self, freq=None, how="start", copy=True) -> "Series": """ Cast to DatetimeIndex of Timestamps, at *beginning* of period. @@ -4446,7 +4463,7 @@ def to_timestamp(self, freq=None, how="start", copy=True): new_index = self.index.to_timestamp(freq=freq, how=how) return self._constructor(new_values, index=new_index).__finalize__(self) - def to_period(self, freq=None, copy=True): + def to_period(self, freq=None, copy=True) -> "Series": """ Convert Series from DatetimeIndex to PeriodIndex with desired frequency (inferred from index if not passed). From 4e2546d89260fda592332a3988573c26edc7152c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sun, 12 Jan 2020 19:04:18 +0200 Subject: [PATCH 15/37] STY: wrong placed space in strings (#30940) --- pandas/tests/frame/test_missing.py | 4 ++-- pandas/tests/frame/test_repr_info.py | 6 +++--- pandas/tests/frame/test_reshape.py | 4 ++-- pandas/tests/groupby/test_grouping.py | 5 +---- pandas/tests/plotting/test_datetimelike.py | 4 ++-- pandas/tests/plotting/test_misc.py | 8 ++++---- pandas/tests/reshape/merge/test_join.py | 4 ++-- pandas/tests/reshape/merge/test_merge.py | 15 +++++++-------- pandas/tests/reshape/test_concat.py | 8 ++++---- pandas/tests/test_strings.py | 8 ++++---- pandas/util/_validators.py | 2 +- 11 files changed, 32 insertions(+), 36 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 2e6759cb1a238..ae0516dd29a1f 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -670,8 +670,8 @@ def test_fillna_invalid_value(self, float_frame): float_frame.fillna((1, 2)) # frame with series msg = ( - '"value" parameter must be a scalar, dict or Series, but you' - ' passed a "DataFrame"' + '"value" parameter must be a scalar, dict or Series, but you ' + 'passed a "DataFrame"' ) with pytest.raises(TypeError, match=msg): float_frame.iloc[:, 0].fillna(float_frame) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 05bdec4a3a4d2..49e6fe4940e18 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -164,13 +164,13 @@ def test_repr_column_name_unicode_truncation_bug(self): "Id": [7117434], "StringCol": ( "Is it possible to modify drop plot code" - " so that the output graph is displayed " + "so that the output graph is displayed " "in iphone simulator, Is it possible to " "modify drop plot code so that the " "output graph is \xe2\x80\xa8displayed " "in iphone simulator.Now we are adding " - "the CSV file externally. I want to Call" - " the File through the code.." + "the CSV file externally. I want to Call " + "the File through the code.." ), } ) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 56a0c8cf4f5bd..60b7611c8b9be 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -424,8 +424,8 @@ def test_stack_mixed_levels(self): # When mixed types are passed and the ints are not level # names, raise msg = ( - "level should contain all level names or all level numbers, not" - " a mixture of the two" + "level should contain all level names or all level numbers, not " + "a mixture of the two" ) with pytest.raises(ValueError, match=msg): df2.stack(level=["animal", 0]) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 70ba21d89d22f..e424913804c33 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -725,10 +725,7 @@ def test_get_group(self): g.get_group("foo") with pytest.raises(ValueError, match=msg): g.get_group(("foo")) - msg = ( - "must supply a same-length tuple to get_group with multiple" - " grouping keys" - ) + msg = "must supply a same-length tuple to get_group with multiple grouping keys" with pytest.raises(ValueError, match=msg): g.get_group(("foo", "bar", "baz")) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 8f855fd0c6cff..fb86b600d3d3c 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -121,8 +121,8 @@ def test_both_style_and_color(self): ts = tm.makeTimeSeries() msg = ( "Cannot pass 'style' string with a color symbol and 'color' " - "keyword argument. Please use one or the other or pass 'style'" - " without a color symbol" + "keyword argument. Please use one or the other or pass 'style' " + "without a color symbol" ) with pytest.raises(ValueError, match=msg): ts.plot(style="b-", color="#000099") diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index c8aa1f23ccf1f..228c84528e882 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -319,8 +319,8 @@ def test_subplot_titles(self, iris): # Case len(title) > len(df) msg = ( - "The length of `title` must equal the number of columns if" - " using `title` of type `list` and `subplots=True`" + "The length of `title` must equal the number of columns if " + "using `title` of type `list` and `subplots=True`" ) with pytest.raises(ValueError, match=msg): df.plot(subplots=True, title=title + ["kittens > puppies"]) @@ -331,8 +331,8 @@ def test_subplot_titles(self, iris): # Case subplots=False and title is of type list msg = ( - "Using `title` of type `list` is not supported unless" - " `subplots=True` is passed" + "Using `title` of type `list` is not supported unless " + "`subplots=True` is passed" ) with pytest.raises(ValueError, match=msg): df.plot(subplots=False, title=title) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index a660acb143433..7020d373caf82 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -212,8 +212,8 @@ def test_join_on(self): source_copy = source.copy() source_copy["A"] = 0 msg = ( - "You are trying to merge on float64 and object columns. If" - " you wish to proceed you should use pd.concat" + "You are trying to merge on float64 and object columns. If " + "you wish to proceed you should use pd.concat" ) with pytest.raises(ValueError, match=msg): target.join(source_copy, on="A") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8e0c4766056d3..30c440035d48e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -201,8 +201,8 @@ def test_merge_misspecified(self): merge(self.left, self.right, right_index=True) msg = ( - 'Can only pass argument "on" OR "left_on" and "right_on", not' - " a combination of both" + 'Can only pass argument "on" OR "left_on" and "right_on", not ' + "a combination of both" ) with pytest.raises(pd.errors.MergeError, match=msg): merge(self.left, self.left, left_on="key", on="key") @@ -1013,10 +1013,9 @@ def test_indicator(self): df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]}) msg = ( - "Cannot use `indicator=True` option when data contains a" - " column named {}|" - "Cannot use name of an existing column for indicator" - " column" + "Cannot use `indicator=True` option when data contains a " + "column named {}|" + "Cannot use name of an existing column for indicator column" ).format(i) with pytest.raises(ValueError, match=msg): merge(df1, df_badcolumn, on="col1", how="outer", indicator=True) @@ -1235,8 +1234,8 @@ def test_validation(self): ) msg = ( - "Merge keys are not unique in either left or right dataset;" - " not a one-to-one merge" + "Merge keys are not unique in either left or right dataset; " + "not a one-to-one merge" ) with pytest.raises(MergeError, match=msg): merge(left, right, on="a", validate="1:1") diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 990669f1ae13a..b3b2c5a05c6ad 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -198,8 +198,8 @@ def test_concatlike_same_dtypes(self): # cannot append non-index msg = ( - r"cannot concatenate object of type '.+';" - " only Series and DataFrame objs are valid" + r"cannot concatenate object of type '.+'; " + "only Series and DataFrame objs are valid" ) with pytest.raises(TypeError, match=msg): pd.Series(vals1).append(vals2) @@ -1866,8 +1866,8 @@ def test_concat_invalid(self): # trying to concat a ndframe with a non-ndframe df1 = tm.makeCustomDataframe(10, 2) msg = ( - "cannot concatenate object of type '{}';" - " only Series and DataFrame objs are valid" + "cannot concatenate object of type '{}'; " + "only Series and DataFrame objs are valid" ) for obj in [1, dict(), [1, 2], (1, 2)]: with pytest.raises(TypeError, match=msg.format(type(obj))): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7f3375070d7d9..a92f917820bd0 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3392,8 +3392,8 @@ def test_encode_decode_errors(self): encodeBase = Series(["a", "b", "a\x9d"]) msg = ( - r"'charmap' codec can't encode character '\\x9d' in position 1:" - " character maps to " + r"'charmap' codec can't encode character '\\x9d' in position 1: " + "character maps to " ) with pytest.raises(UnicodeEncodeError, match=msg): encodeBase.str.encode("cp1252") @@ -3406,8 +3406,8 @@ def test_encode_decode_errors(self): decodeBase = Series([b"a", b"b", b"a\x9d"]) msg = ( - "'charmap' codec can't decode byte 0x9d in position 1:" - " character maps to " + "'charmap' codec can't decode byte 0x9d in position 1: " + "character maps to " ) with pytest.raises(UnicodeDecodeError, match=msg): decodeBase.str.decode("cp1252") diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index b69c974661f89..a715094e65e98 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -297,7 +297,7 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " "arguments to remove any ambiguity. In the future, using " "positional arguments for 'index' or 'columns' will raise " - " a 'TypeError'." + "a 'TypeError'." ) warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4) out[data._AXIS_NAMES[0]] = args[0] From 45580a213c9e5e2c69fa39840c3583f6f5160bed Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 13 Jan 2020 01:31:16 -0700 Subject: [PATCH 16/37] DOC: Fix whatsnew contributors section (#30926) --- doc/source/whatsnew/v0.25.3.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst index f73a3f956f42e..f7f54198a0f82 100644 --- a/doc/source/whatsnew/v0.25.3.rst +++ b/doc/source/whatsnew/v0.25.3.rst @@ -19,4 +19,4 @@ Groupby/resample/rolling Contributors ~~~~~~~~~~~~ -.. contributors:: v0.25.2..HEAD +.. contributors:: v0.25.2..v0.25.3 diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5f79accc5c679..afbc113e98957 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1177,3 +1177,5 @@ Other Contributors ~~~~~~~~~~~~ + +.. contributors:: v0.25.3..v1.0.0rc0 From 439d6298f9af1a6ddb207a6920d47d6e0eb1abe4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Jan 2020 01:48:37 -0800 Subject: [PATCH 17/37] CI: numpydev changed double to single quote (#30952) --- pandas/tests/dtypes/test_common.py | 3 ++- pandas/tests/frame/methods/test_to_records.py | 2 +- pandas/tests/indexes/interval/test_astype.py | 2 +- pandas/tests/indexes/interval/test_constructors.py | 2 +- pandas/tests/io/parser/test_dtypes.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ce925891f62c0..097e83d93ee71 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -668,7 +668,8 @@ def test__get_dtype(input_param, result): (None, "Cannot deduce dtype from null object"), (1, "data type not understood"), (1.2, "data type not understood"), - ("random string", 'data type "random string" not understood'), + # numpy dev changed from double-quotes to single quotes + ("random string", "data type [\"']random string[\"'] not understood"), (pd.DataFrame([1, 2]), "data type not understood"), ], ) diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 54a3affdc3024..d0181f0309af1 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -235,7 +235,7 @@ def test_to_records_with_categorical(self): # Check that bad types raise ( dict(index=False, column_dtypes={"A": "int32", "B": "foo"}), - (TypeError, 'data type "foo" not understood'), + (TypeError, "data type [\"']foo[\"'] not understood"), ), ], ) diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index 2b1742d58b77e..c94af6c0d533e 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -67,7 +67,7 @@ def test_astype_cannot_cast(self, index, dtype): index.astype(dtype) def test_astype_invalid_dtype(self, index): - msg = 'data type "fake_dtype" not understood' + msg = "data type [\"']fake_dtype[\"'] not understood" with pytest.raises(TypeError, match=msg): index.astype("fake_dtype") diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 13a45df743cf5..837c124db2bed 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -164,7 +164,7 @@ def test_generic_errors(self, constructor): constructor(dtype="int64", **filler) # invalid dtype - msg = 'data type "invalid" not understood' + msg = "data type [\"']invalid[\"'] not understood" with pytest.raises(TypeError, match=msg): constructor(dtype="invalid", **filler) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 2133f8116a95e..d08c86bf2ae75 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -79,7 +79,7 @@ def test_invalid_dtype_per_column(all_parsers): 3,4.5 4,5.5""" - with pytest.raises(TypeError, match='data type "foo" not understood'): + with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"): parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) From 46c2864c34eee0cd94c8842353331e293b0f2004 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Jan 2020 03:00:11 -0800 Subject: [PATCH 18/37] CLN: leftover ix checks (#30951) --- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 29 ++++++++++++++--------------- pandas/core/indexes/datetimelike.py | 6 +++--- pandas/core/indexes/datetimes.py | 4 ++-- pandas/core/indexes/numeric.py | 8 ++++---- pandas/core/indexes/period.py | 4 ++-- pandas/core/indexes/timedeltas.py | 4 ++-- 7 files changed, 28 insertions(+), 29 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 03e86758b64ed..04ce424edbee4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -177,7 +177,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): ] _internal_names_set: Set[str] = set(_internal_names) _accessors: Set[str] = set() - _deprecations: FrozenSet[str] = frozenset(["get_values", "ix"]) + _deprecations: FrozenSet[str] = frozenset(["get_values"]) _metadata: List[str] = [] _is_copy = None _data: BlockManager diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ca929b188dc33..62e3fd28f6684 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2829,12 +2829,12 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): Parameters ---------- key : label of the slice bound - kind : {'ix', 'loc', 'getitem', 'iloc'} or None + kind : {'loc', 'getitem', 'iloc'} or None """ @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] if kind == "iloc": return self._validate_indexer("positional", key, kind) @@ -2842,11 +2842,11 @@ def _convert_scalar_indexer(self, key, kind=None): if len(self) and not isinstance(self, ABCMultiIndex): # we can raise here if we are definitive that this - # is positional indexing (eg. .ix on with a float) + # is positional indexing (eg. .loc on with a float) # or label indexing if we are using a type able # to be represented in the index - if kind in ["getitem", "ix"] and is_float(key): + if kind in ["getitem"] and is_float(key): if not self.is_floating(): return self._invalid_indexer("label", key) @@ -2882,12 +2882,12 @@ def _convert_scalar_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : {'ix', 'loc', 'getitem', 'iloc'} or None + kind : {'loc', 'getitem', 'iloc'} or None """ @Appender(_index_shared_docs["_convert_slice_indexer"]) def _convert_slice_indexer(self, key: slice, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] # validate iloc if kind == "iloc": @@ -3026,7 +3026,7 @@ def _convert_index_indexer(self, keyarr): @Appender(_index_shared_docs["_convert_list_indexer"]) def _convert_list_indexer(self, keyarr, kind=None): if ( - kind in [None, "iloc", "ix"] + kind in [None, "iloc"] and is_integer_dtype(keyarr) and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex) @@ -4704,7 +4704,7 @@ def _validate_indexer(self, form, key, kind): If we are positional indexer, validate that we have appropriate typed bounds must be an integer. """ - assert kind in ["ix", "loc", "getitem", "iloc"] + assert kind in ["loc", "getitem", "iloc"] if key is None: pass @@ -4725,7 +4725,7 @@ def _validate_indexer(self, form, key, kind): ---------- label : object side : {'left', 'right'} - kind : {'ix', 'loc', 'getitem'} + kind : {'loc', 'getitem'} or None Returns ------- @@ -4738,15 +4738,14 @@ def _validate_indexer(self, form, key, kind): @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["loc", "getitem", None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and # datetimelike Indexes # reject them if is_float(label): - if not (kind in ["ix"] and (self.holds_integer() or self.is_floating())): - self._invalid_indexer("slice", label) + self._invalid_indexer("slice", label) # we are trying to find integer bounds on a non-integer based index # this is rejected (generally .loc gets you here) @@ -4780,14 +4779,14 @@ def get_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : {'ix', 'loc', 'getitem'} + kind : {'loc', 'getitem'} or None Returns ------- int Index of label. """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["loc", "getitem", None] if side not in ("left", "right"): raise ValueError( @@ -4847,7 +4846,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): If None, defaults to the end. step : int, defaults None If None, defaults to 1. - kind : {'ix', 'loc', 'getitem'} or None + kind : {'loc', 'getitem'} or None Returns ------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c4dac9d1c4a11..9eb5ed7cb0911 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -388,10 +388,10 @@ def _convert_scalar_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : {'ix', 'loc', 'getitem', 'iloc'} or None + kind : {'loc', 'getitem', 'iloc'} or None """ - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem @@ -400,7 +400,7 @@ def _convert_scalar_indexer(self, key, kind=None): is_flt = is_float(key) if kind in ["loc"] and (is_int or is_flt): self._invalid_indexer("index", key) - elif kind in ["ix", "getitem"] and is_flt: + elif kind in ["getitem"] and is_flt: self._invalid_indexer("index", key) return super()._convert_scalar_indexer(key, kind=kind) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2241921e94694..75515949d1855 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -742,7 +742,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : {'ix', 'loc', 'getitem'} + kind : {'loc', 'getitem'} or None Returns ------- @@ -752,7 +752,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ----- Value of `side` parameter should be validated in caller. """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["loc", "getitem", None] if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer("slice", label) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b9b44284edaa9..9a3a021bd801a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -99,7 +99,7 @@ def _validate_dtype(cls, dtype: Dtype) -> None: @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["loc", "getitem", None] # we will try to coerce to integers return self._maybe_cast_indexer(label) @@ -260,7 +260,7 @@ def asi8(self) -> np.ndarray: @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] # don't coerce ilocs to integers if kind != "iloc": @@ -317,7 +317,7 @@ def asi8(self) -> np.ndarray: @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] # don't coerce ilocs to integers if kind != "iloc": @@ -404,7 +404,7 @@ def astype(self, dtype, copy=True): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["loc", "getitem", "iloc", None] if kind == "iloc": return self._validate_indexer("positional", key, kind) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6ab2e66e05d6e..4e3689078d535 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -625,7 +625,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : {'ix', 'loc', 'getitem'} + kind : {'loc', 'getitem'} Returns ------- @@ -636,7 +636,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ - assert kind in ["ix", "loc", "getitem"] + assert kind in ["loc", "getitem"] if isinstance(label, datetime): return Period(label, freq=self.freq) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1f3182bc83e1d..582c257b50ad0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -310,13 +310,13 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : {'ix', 'loc', 'getitem'} + kind : {'loc', 'getitem'} or None Returns ------- label : object """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["loc", "getitem", None] if isinstance(label, str): parsed = Timedelta(label) From bbccf2d8389eb661aad7655eba5c0c7413cc55cf Mon Sep 17 00:00:00 2001 From: Souvik Mandal Date: Mon, 13 Jan 2020 18:35:17 +0530 Subject: [PATCH 19/37] DOC: Move import conventions from wiki to docs #30808 (#30888) --- doc/source/development/code_style.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst index 2fc2f1fb6ee8d..a295038b5a0bd 100644 --- a/doc/source/development/code_style.rst +++ b/doc/source/development/code_style.rst @@ -127,3 +127,29 @@ For example: value = str f"Unknown recived type, got: '{type(value).__name__}'" + + +Imports (aim for absolute) +========================== + +In Python 3, absolute imports are recommended. In absolute import doing something +like ``import string`` will import the string module rather than ``string.py`` +in the same directory. As much as possible, you should try to write out +absolute imports that show the whole import chain from toplevel pandas. + +Explicit relative imports are also supported in Python 3. But it is not +recommended to use it. Implicit relative imports should never be used +and is removed in Python 3. + +For example: + +:: + + # preferred + import pandas.core.common as com + + # not preferred + from .common import test_base + + # wrong + from common import test_base From 2c76d064835b3f616f858d94a45fae152ea2b510 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Mon, 13 Jan 2020 13:08:37 +0000 Subject: [PATCH 20/37] DOC: Move couple of deprecations whatsnew to correct section (#30961) --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index afbc113e98957..0879189a822f8 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -218,7 +218,6 @@ Other enhancements now preserve those data types with pyarrow >= 0.16.0 (:issue:`20612`, :issue:`28371`). - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) -- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) @@ -226,7 +225,6 @@ Other enhancements - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`) - :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) -- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`) - Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) - :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) - :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) @@ -707,6 +705,8 @@ Deprecations - ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) - The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) - Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) +- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) +- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`) **Selecting Columns from a Grouped DataFrame** From 62d16abd3051cacecfe6307be074acaa00587560 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 13 Jan 2020 16:49:32 +0200 Subject: [PATCH 21/37] STY: concat strings that should not be seperated (#30942) --- pandas/_libs/algos.pyx | 9 ++-- pandas/_libs/groupby.pyx | 3 +- pandas/_libs/hashing.pyx | 11 +++-- pandas/_libs/indexing.pyx | 5 +- pandas/_libs/sparse.pyx | 6 +-- pandas/_libs/testing.pyx | 6 +-- pandas/_libs/tslibs/timestamps.pyx | 74 ++++++++++++++++++---------- pandas/_libs/window/aggregations.pyx | 3 +- 8 files changed, 68 insertions(+), 49 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7a2fc9dc7845a..dd1f38ce3a842 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -914,8 +914,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average', ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported for ' - 'non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1 @@ -971,8 +970,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average', ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported for ' - 'non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1 @@ -1137,8 +1135,7 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average', ranks[i, argsorted[i, z]] = j + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported ' - 'for non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for z in range(j - dups + 1, j + 1): ranks[i, argsorted[i, z]] = z + 1 diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index abb8a6d388d26..93ea94f7b18fc 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -686,8 +686,7 @@ def _group_ohlc(floating[:, :] out, raise ValueError('Output array must have 4 columns') if K > 1: - raise NotImplementedError("Argument 'values' must have only " - "one dimension") + raise NotImplementedError("Argument 'values' must have only one dimension") out[:] = np.nan with nogil: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 5298d8c5ed34e..878da670b2f68 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -51,8 +51,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): k = key.encode(encoding) kb = k if len(k) != 16: - raise ValueError("key should be a 16-byte string encoded, " - f"got {k} (len {len(k)})") + raise ValueError( + f"key should be a 16-byte string encoded, got {k} (len {len(k)})" + ) n = len(arr) @@ -77,8 +78,10 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): hash(val) data = str(val).encode(encoding) else: - raise TypeError(f"{val} of type {type(val)} is not a valid type " - "for hashing, must be string or null") + raise TypeError( + f"{val} of type {type(val)} is not a valid type for hashing, " + "must be string or null" + ) l = len(data) lens[i] = l diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx index 01f4fb060d982..cdccdb504571c 100644 --- a/pandas/_libs/indexing.pyx +++ b/pandas/_libs/indexing.pyx @@ -18,6 +18,7 @@ cdef class _NDFrameIndexerBase: if ndim is None: ndim = self._ndim = self.obj.ndim if ndim > 2: - raise ValueError("NDFrameIndexer does not support " - "NDFrame objects with ndim > 2") + raise ValueError( + "NDFrameIndexer does not support NDFrame objects with ndim > 2" + ) return ndim diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index ee83901040b36..3a6dd506b2428 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -72,9 +72,9 @@ cdef class IntIndex(SparseIndex): """ if self.npoints > self.length: - msg = (f"Too many indices. Expected " - f"{self.length} but found {self.npoints}") - raise ValueError(msg) + raise ValueError( + f"Too many indices. Expected {self.length} but found {self.npoints}" + ) # Indices are vacuously ordered and non-negative # if the sequence of indices is empty. diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 5a30b71a6fea1..0e57b563d4d25 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -127,9 +127,9 @@ cpdef assert_almost_equal(a, b, # classes can't be the same, to raise error assert_class_equal(a, b, obj=obj) - assert has_length(a) and has_length(b), ("Can't compare objects without " - "length, one or both is invalid: " - f"({a}, {b})") + assert has_length(a) and has_length(b), ( + f"Can't compare objects without length, one or both is invalid: ({a}, {b})" + ) if a_is_ndarray and b_is_ndarray: na, nb = a.size, b.size diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index abe7f9e5b4105..36566b55e74ad 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -161,8 +161,7 @@ def round_nsint64(values, mode, freq): # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug - raise ValueError("round_nsint64 called with an unrecognized " - "rounding mode") + raise ValueError("round_nsint64 called with an unrecognized rounding mode") # ---------------------------------------------------------------------- @@ -324,8 +323,10 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implemented." - "Use to_datetime() to parse date strings.") + raise NotImplementedError( + "Timestamp.strptime() is not implemented. " + "Use to_datetime() to parse date strings." + ) @classmethod def combine(cls, date, time): @@ -381,8 +382,9 @@ class Timestamp(_Timestamp): if tzinfo is not None: if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 - raise TypeError(f'tzinfo must be a datetime.tzinfo object, ' - f'not {type(tzinfo)}') + raise TypeError( + f"tzinfo must be a datetime.tzinfo object, not {type(tzinfo)}" + ) elif tz is not None: raise ValueError('Can provide at most one of tz, tzinfo') @@ -393,8 +395,10 @@ class Timestamp(_Timestamp): # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): - raise ValueError('Cannot pass a date attribute keyword ' - 'argument when passing a date string') + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string" + ) elif ts_input is _no_input: # User passed keyword arguments. @@ -578,8 +582,10 @@ timedelta}, default 'raise' @tz.setter def tz(self, value): # GH 3746: Prevent localizing or converting the index by setting tz - raise AttributeError("Cannot directly set timezone. Use tz_localize() " - "or tz_convert() as appropriate") + raise AttributeError( + "Cannot directly set timezone. " + "Use tz_localize() or tz_convert() as appropriate" + ) def __setstate__(self, state): self.value = state[0] @@ -598,9 +604,10 @@ timedelta}, default 'raise' if self.tz is not None: # GH#21333 - warnings.warn("Converting to Period representation will " - "drop timezone information.", - UserWarning) + warnings.warn( + "Converting to Period representation will drop timezone information.", + UserWarning, + ) if freq is None: freq = self.freq @@ -810,13 +817,13 @@ default 'raise' if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') - nonexistent_options = ('raise', 'NaT', 'shift_forward', - 'shift_backward') + nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') if nonexistent not in nonexistent_options and not isinstance( nonexistent, timedelta): - raise ValueError("The nonexistent argument must be one of 'raise', " - "'NaT', 'shift_forward', 'shift_backward' or " - "a timedelta object") + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or a timedelta object" + ) if self.tzinfo is None: # tz naive, localize @@ -833,8 +840,9 @@ default 'raise' value = tz_convert_single(self.value, UTC, self.tz) return Timestamp(value, tz=tz, freq=self.freq) else: - raise TypeError('Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions') + raise TypeError( + "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + ) def tz_convert(self, tz): """ @@ -857,17 +865,28 @@ default 'raise' """ if self.tzinfo is None: # tz naive, use tz_localize - raise TypeError('Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize') + raise TypeError( + "Cannot convert tz-naive Timestamp, use tz_localize to localize" + ) else: # Same UTC timestamp, different time zone return Timestamp(self.value, tz=tz, freq=self.freq) astimezone = tz_convert - def replace(self, year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo=object, fold=0): + def replace( + self, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=object, + fold=0, + ): """ implements datetime.replace, handles nanoseconds. @@ -910,8 +929,9 @@ default 'raise' def validate(k, v): """ validate integers """ if not is_integer_object(v): - raise ValueError(f"value must be an integer, received " - f"{type(v)} for {k}") + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) return v if year is not None: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0348843abc129..fe74d701ef00f 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1871,8 +1871,7 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y, bint is_observation if len(input_y) != N: - raise ValueError(f"arrays are of different lengths " - f"({N} and {len(input_y)})") + raise ValueError(f"arrays are of different lengths ({N} and {len(input_y)})") output = np.empty(N, dtype=float) if N == 0: From dd6e31aa41056cfb4724eafeefa36f9587e8a763 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 13 Jan 2020 09:22:03 -0600 Subject: [PATCH 22/37] REGR: Fixed hash_key=None for object values (#30900) * REGR: Fixed hash_key=None for object values Closes https://github.com/pandas-dev/pandas/issues/30887 --- pandas/core/util/hashing.py | 6 +++++- pandas/tests/util/test_hashing.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 43655fa3ea913..3366f10b92604 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,6 +2,7 @@ data hash pandas / numpy objects """ import itertools +from typing import Optional import numpy as np @@ -58,7 +59,7 @@ def hash_pandas_object( obj, index: bool = True, encoding: str = "utf8", - hash_key: str = _default_hash_key, + hash_key: Optional[str] = _default_hash_key, categorize: bool = True, ): """ @@ -82,6 +83,9 @@ def hash_pandas_object( """ from pandas import Series + if hash_key is None: + hash_key = _default_hash_key + if isinstance(obj, ABCMultiIndex): return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c915edad4bb8e..c856585f20138 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -374,3 +374,10 @@ def test_hash_with_tuple(): df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) with pytest.raises(TypeError, match="unhashable type: 'list'"): hash_pandas_object(df3) + + +def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 + result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) + expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected) From bd63eceb80333b7160c2be08163e718804930d57 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Jan 2020 09:22:28 -0800 Subject: [PATCH 23/37] CLN: remove no-op from indexing (#30934) --- pandas/core/indexing.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ea59a6a49e649..cc11879142ffe 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1340,9 +1340,6 @@ def _multi_take(self, tup: Tuple): } return o._reindex_with_indexers(d, copy=True, allow_dups=True) - def _convert_for_reindex(self, key, axis: int): - return key - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): # we have an axis0 multi-index, handle or raise axis = self.axis or 0 @@ -1539,10 +1536,6 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): return ax[indexer], indexer if ax.is_unique and not getattr(ax, "is_overlapping", False): - # If we are trying to get actual keys from empty Series, we - # patiently wait for a KeyError later on - otherwise, convert - if len(ax) or not len(key): - key = self._convert_for_reindex(key, axis) indexer = ax.get_indexer_for(key) keyarr = ax.reindex(keyarr)[0] else: @@ -1757,6 +1750,7 @@ def __getitem__(self, key): try: return self._getitem_scalar(key) except (KeyError, IndexError, AttributeError): + # AttributeError for IntervalTree get_value pass return self._getitem_tuple(key) else: From 7ba53f0e0e61b0e542bf553707a1df7cf8cfa83e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 13 Jan 2020 19:03:25 +0000 Subject: [PATCH 24/37] BUG: -1 to the power of pd.NA was returning -1 (#30960) --- doc/source/user_guide/missing_data.rst | 1 - pandas/_libs/missing.pyx | 4 ++-- pandas/tests/arrays/test_integer.py | 12 ++++++----- pandas/tests/scalar/test_na_scalar.py | 29 ++++++++++++++------------ 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index abbb6feef6056..df9949e8ac261 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -831,7 +831,6 @@ Operation Result ================ ====== ``pd.NA ** 0`` 0 ``1 ** pd.NA`` 1 -``-1 ** pd.NA`` -1 ================ ====== In equality and comparison operations, ``pd.NA`` also propagates. This deviates diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 26653438356b1..4d17a6f883c1c 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -417,12 +417,12 @@ class NAType(C_NAType): if other is C_NA: return NA elif isinstance(other, (numbers.Number, np.bool_)): - if other == 1 or other == -1: + if other == 1: return other else: return NA elif isinstance(other, np.ndarray): - return np.where((other == 1) | (other == -1), other, NA) + return np.where(other == 1, other, NA) return NotImplemented diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0c8980c43c370..f1a7cc741603d 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -363,24 +363,26 @@ def test_divide_by_zero(self, zero, negative): tm.assert_numpy_array_equal(result, expected) def test_pow_scalar(self): - a = pd.array([0, 1, None, 2], dtype="Int64") + a = pd.array([-1, 0, 1, None, 2], dtype="Int64") result = a ** 0 - expected = pd.array([1, 1, 1, 1], dtype="Int64") + expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") tm.assert_extension_array_equal(result, expected) result = a ** 1 - expected = pd.array([0, 1, None, 2], dtype="Int64") + expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") tm.assert_extension_array_equal(result, expected) result = a ** pd.NA - expected = pd.array([None, 1, None, None], dtype="Int64") + expected = pd.array([None, None, 1, None, None], dtype="Int64") tm.assert_extension_array_equal(result, expected) result = a ** np.nan - expected = np.array([np.nan, 1, np.nan, np.nan], dtype="float64") + expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected) # reversed + a = a[1:] # Can't raise integers to negative powers. + result = 0 ** a expected = pd.array([1, 0, None, 0], dtype="Int64") tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index 7d05511239ebc..dcb9d66708724 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -96,19 +96,7 @@ def test_pow_special(value, asarray): @pytest.mark.parametrize( - "value", - [ - 1, - 1.0, - -1, - -1.0, - True, - np.bool_(True), - np.int_(1), - np.float_(1), - np.int_(-1), - np.float_(-1), - ], + "value", [1, 1.0, True, np.bool_(True), np.int_(1), np.float_(1)], ) @pytest.mark.parametrize("asarray", [True, False]) def test_rpow_special(value, asarray): @@ -125,6 +113,21 @@ def test_rpow_special(value, asarray): assert result == value +@pytest.mark.parametrize( + "value", [-1, -1.0, np.int_(-1), np.float_(-1)], +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_rpow_minus_one(value, asarray): + if asarray: + value = np.array([value]) + result = value ** pd.NA + + if asarray: + result = result[0] + + assert pd.isna(result) + + def test_unary_ops(): assert +NA is NA assert -NA is NA From 67fcdefbd42921faa116558d5cf93635dd6fb1fc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 13 Jan 2020 19:28:23 +0000 Subject: [PATCH 25/37] TYP: NDFrame.resample (#30947) --- pandas/core/frame.py | 7 ++++--- pandas/core/generic.py | 10 +++++++--- pandas/core/resample.py | 4 ++-- pandas/core/series.py | 7 ++++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 676b78573399c..594b8a00a8672 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -102,7 +102,6 @@ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.groupby import generic as groupby_generic from pandas.core.indexes import base as ibase from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences from pandas.core.indexes.datetimes import DatetimeIndex @@ -129,6 +128,7 @@ import pandas.plotting if TYPE_CHECKING: + from pandas.core.groupby.generic import DataFrameGroupBy from pandas.io.formats.style import Styler # --------------------------------------------------------------------- @@ -5777,13 +5777,14 @@ def groupby( group_keys: bool = True, squeeze: bool = False, observed: bool = False, - ) -> "groupby_generic.DataFrameGroupBy": + ) -> "DataFrameGroupBy": + from pandas.core.groupby.generic import DataFrameGroupBy if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) - return groupby_generic.DataFrameGroupBy( + return DataFrameGroupBy( obj=self, keys=by, axis=axis, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 04ce424edbee4..05066ac0ec128 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,6 +8,7 @@ import re from textwrap import dedent from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -101,6 +102,9 @@ from pandas.io.formats.printing import pprint_thing from pandas.tseries.frequencies import to_offset +if TYPE_CHECKING: + from pandas.core.resample import Resampler + # goal is to be able to define the docs close to function, while still being # able to share _shared_docs: Dict[str, str] = dict() @@ -7685,7 +7689,7 @@ def resample( base: int = 0, on=None, level=None, - ): + ) -> "Resampler": """ Resample time-series data. @@ -7950,10 +7954,10 @@ def resample( 2000-01-04 36 90 """ - from pandas.core.resample import resample + from pandas.core.resample import get_resampler axis = self._get_axis_number(axis) - return resample( + return get_resampler( self, freq=rule, label=label, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0e43880dfda07..fb837409a00f5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1262,7 +1262,7 @@ def _constructor(self): return TimedeltaIndexResampler -def resample(obj, kind=None, **kwds): +def get_resampler(obj, kind=None, **kwds): """ Create a TimeGrouper and return our resampler. """ @@ -1270,7 +1270,7 @@ def resample(obj, kind=None, **kwds): return tg._get_resampler(obj, kind=kind) -resample.__doc__ = Resampler.__doc__ +get_resampler.__doc__ = Resampler.__doc__ def get_resampler_for_grouping( diff --git a/pandas/core/series.py b/pandas/core/series.py index ed338700f1011..fe5c5fd5e2bc8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -70,7 +70,6 @@ is_empty_data, sanitize_array, ) -from pandas.core.groupby import generic as groupby_generic from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( @@ -94,6 +93,7 @@ if TYPE_CHECKING: from pandas.core.frame import DataFrame + from pandas.core.groupby.generic import SeriesGroupBy __all__ = ["Series"] @@ -1634,13 +1634,14 @@ def groupby( group_keys: bool = True, squeeze: bool = False, observed: bool = False, - ) -> "groupby_generic.SeriesGroupBy": + ) -> "SeriesGroupBy": + from pandas.core.groupby.generic import SeriesGroupBy if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) - return groupby_generic.SeriesGroupBy( + return SeriesGroupBy( obj=self, keys=by, axis=axis, From 993fdbebbcf96e31d6de7cf40b297a50ddfcaf7c Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 13 Jan 2020 11:50:08 -0800 Subject: [PATCH 26/37] DOC: whatsnew for 1.1 (#30972) --- doc/source/index.rst.template | 4 +- doc/source/whatsnew/index.rst | 8 ++ doc/source/whatsnew/v1.1.0.rst | 168 +++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 doc/source/whatsnew/v1.1.0.rst diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 10705787dfedf..4ced92cbda81a 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -39,7 +39,7 @@ See the :ref:`overview` for more detail about what's in the library. :hidden: {% endif %} {% if not single_doc %} - What's New in 1.0.0 + What's New in 1.1.0 getting_started/index user_guide/index {% endif -%} @@ -51,7 +51,7 @@ See the :ref:`overview` for more detail about what's in the library. whatsnew/index {% endif %} -* :doc:`whatsnew/v1.0.0` +* :doc:`whatsnew/v1.1.0` * :doc:`getting_started/index` * :doc:`getting_started/install` diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 05c7f72882088..bc463d0ab22d8 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details, see the commit logs at http://github.com/pandas-dev/pandas. For install and upgrade instructions, see :ref:`install`. +Version 1.1 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.1.0 + Version 1.0 ----------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst new file mode 100644 index 0000000000000..c3ee72f6442fc --- /dev/null +++ b/doc/source/whatsnew/v1.1.0.rst @@ -0,0 +1,168 @@ +.. _whatsnew_110: + +What's new in 1.1.0 (??) +------------------------ + +These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_110.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- +- + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_110.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- +- + +Datetimelike +^^^^^^^^^^^^ +- +- + +Timedelta +^^^^^^^^^ + +- +- + +Timezones +^^^^^^^^^ + +- +- + + +Numeric +^^^^^^^ +- +- + +Conversion +^^^^^^^^^^ + +- +- + +Strings +^^^^^^^ + +- +- + + +Interval +^^^^^^^^ + +- +- + +Indexing +^^^^^^^^ + +- +- + +Missing +^^^^^^^ + +- +- + +MultiIndex +^^^^^^^^^^ + +- +- + +I/O +^^^ + +- +- + +Plotting +^^^^^^^^ + +- +- + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- +- + + +Reshaping +^^^^^^^^^ + +- +- + +Sparse +^^^^^^ + +- +- + +ExtensionArray +^^^^^^^^^^^^^^ + +- +- + + +Other +^^^^^ +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.contributors: + +Contributors +~~~~~~~~~~~~ From 69283277ecf220cb9715d2460b3e630e31e0e686 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Jan 2020 12:25:58 -0800 Subject: [PATCH 27/37] CLN: misc cleanups (#30877) --- pandas/_libs/index.pyx | 2 -- pandas/core/indexes/datetimelike.py | 14 ++++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 28d269a9a809e..ce6d12d61c521 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -85,7 +85,6 @@ cdef class IndexEngine: """ cdef: object loc - void* data_ptr loc = self.get_loc(key) if isinstance(loc, slice) or util.is_array(loc): @@ -101,7 +100,6 @@ cdef class IndexEngine: """ cdef: object loc - void* data_ptr loc = self.get_loc(key) value = convert_scalar(arr, value) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9eb5ed7cb0911..bf1272b223f70 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -156,13 +156,11 @@ def equals(self, other) -> bool: def __contains__(self, key): try: res = self.get_loc(key) - return ( - is_scalar(res) - or isinstance(res, slice) - or (is_list_like(res) and len(res)) - ) except (KeyError, TypeError, ValueError): return False + return bool( + is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) + ) # Try to run function on index first, and then on elements of index # Especially important for group-by functionality @@ -875,11 +873,7 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool: def _wrap_joined_index(self, joined, other): name = get_op_result_name(self, other) - if ( - isinstance(other, type(self)) - and self.freq == other.freq - and self._can_fast_union(other) - ): + if self._can_fast_union(other): joined = self._shallow_copy(joined) joined.name = name return joined From 2bf0c9fde7dacde096729cd241902f8571f9c024 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 13 Jan 2020 14:45:42 -0600 Subject: [PATCH 28/37] Compat for util.testing import (#30973) * Compat for util.testing import Closes #30869 --- pandas/tests/api/test_api.py | 18 ++++++++++++++++++ pandas/util/__init__.py | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8b897524cb053..406d5f055797d 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,6 +1,9 @@ +import subprocess import sys from typing import List +import pytest + import pandas as pd from pandas import api, compat import pandas._testing as tm @@ -311,3 +314,18 @@ def test_util_testing_deprecated_direct(self): assert "pandas.util.testing is deprecated" in str(m[0].message) assert "pandas.testing instead" in str(m[0].message) + + def test_util_in_top_level(self): + # in a subprocess to avoid import caching issues + out = subprocess.check_output( + [ + sys.executable, + "-c", + "import pandas; pandas.util.testing.assert_series_equal", + ], + stderr=subprocess.STDOUT, + ).decode() + assert "pandas.util.testing is deprecated" in out + + with pytest.raises(AttributeError, match="foo"): + pd.util.foo diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index d906c0371d207..b5271dbc0443e 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -1,3 +1,30 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly # noqa +from pandas import compat from pandas.core.util.hashing import hash_array, hash_pandas_object # noqa + +# compatibility for import pandas; pandas.util.testing + +if compat.PY37: + + def __getattr__(name): + if name == "testing": + import pandas.util.testing + + return pandas.util.testing + else: + raise AttributeError(f"module 'pandas.util' has no attribute '{name}'") + + +else: + + class _testing: + def __getattr__(self, item): + import pandas.util.testing + + return getattr(pandas.util.testing, item) + + testing = _testing() + + +del compat From 307137ce95869fe636d99d38059827e8c063d430 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 14 Jan 2020 00:01:38 +0200 Subject: [PATCH 29/37] STY: concat strings (#30979) --- pandas/__init__.py | 9 +++---- pandas/_config/config.py | 3 +-- pandas/core/ops/__init__.py | 3 +-- pandas/core/reshape/melt.py | 6 ++--- pandas/core/reshape/merge.py | 34 ++++++++----------------- pandas/core/reshape/tile.py | 3 +-- pandas/core/tools/datetimes.py | 7 ++--- pandas/core/window/common.py | 3 +-- pandas/core/window/rolling.py | 11 +++----- pandas/io/excel/_util.py | 3 +-- pandas/io/formats/format.py | 8 ++---- pandas/io/formats/html.py | 4 +-- pandas/io/formats/latex.py | 3 +-- pandas/io/sas/sas.pyx | 5 ++-- pandas/io/sas/sas7bdat.py | 6 ++--- pandas/io/sas/sasreader.py | 3 +-- scripts/generate_pip_deps_from_conda.py | 3 +-- 17 files changed, 39 insertions(+), 75 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 491bcb21f245d..d526531b159b2 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -35,8 +35,7 @@ raise ImportError( f"C extension: {module} not built. If you want to import " "pandas from the source directory, you may need to run " - "'python setup.py build_ext --inplace --force' to build " - "the C extensions first." + "'python setup.py build_ext --inplace --force' to build the C extensions first." ) from pandas._config import ( @@ -198,8 +197,7 @@ def __getattr__(name): warnings.warn( "The Panel class is removed from pandas. Accessing it " - "from the top-level namespace will also be removed in " - "the next version", + "from the top-level namespace will also be removed in the next version", FutureWarning, stacklevel=2, ) @@ -238,8 +236,7 @@ class Panel: elif name in {"SparseSeries", "SparseDataFrame"}: warnings.warn( f"The {name} class is removed from pandas. Accessing it from " - "the top-level namespace will also be removed in the next " - "version", + "the top-level namespace will also be removed in the next version", FutureWarning, stacklevel=2, ) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 0a3009f74492f..42df8a84a8c77 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -165,8 +165,7 @@ def _reset_option(pat, silent=False): raise ValueError( "You must specify at least 4 characters when " "resetting multiple keys, use the special keyword " - '"all" to reset all the options to their default ' - "value" + '"all" to reset all the options to their default value' ) for k in keys: diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index f51d71d5507a0..1355060efd097 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -664,8 +664,7 @@ def to_series(right): elif right.ndim > 2: raise ValueError( - "Unable to coerce to Series/DataFrame, dim " - f"must be <= 2: {right.shape}" + f"Unable to coerce to Series/DataFrame, dim must be <= 2: {right.shape}" ) elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index d4ccb19fc0dda..d04287e1e9088 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -52,8 +52,7 @@ def melt( if not missing.empty: raise KeyError( "The following 'id_vars' are not present " - "in the DataFrame: {missing}" - "".format(missing=list(missing)) + f"in the DataFrame: {list(missing)}" ) else: id_vars = [] @@ -74,8 +73,7 @@ def melt( if not missing.empty: raise KeyError( "The following 'value_vars' are not present in " - "the DataFrame: {missing}" - "".format(missing=list(missing)) + f"the DataFrame: {list(missing)}" ) frame = frame.loc[:, id_vars + value_vars] else: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5f92e4a88b568..acb53ff6ca555 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -600,13 +600,11 @@ def __init__( if not is_bool(left_index): raise ValueError( - "left_index parameter must be of type bool, not " - "{left_index}".format(left_index=type(left_index)) + f"left_index parameter must be of type bool, not {type(left_index)}" ) if not is_bool(right_index): raise ValueError( - "right_index parameter must be of type bool, not " - "{right_index}".format(right_index=type(right_index)) + f"right_index parameter must be of type bool, not {type(right_index)}" ) # warn user when merging between different levels @@ -1092,8 +1090,7 @@ def _maybe_coerce_merge_keys(self): warnings.warn( "You are merging on int and float " "columns where the float values " - "are not equal to their int " - "representation", + "are not equal to their int representation", UserWarning, ) continue @@ -1103,8 +1100,7 @@ def _maybe_coerce_merge_keys(self): warnings.warn( "You are merging on int and float " "columns where the float values " - "are not equal to their int " - "representation", + "are not equal to their int representation", UserWarning, ) continue @@ -1251,20 +1247,17 @@ def _validate(self, validate: str): ) elif not left_unique: raise MergeError( - "Merge keys are not unique in left dataset; " - "not a one-to-one merge" + "Merge keys are not unique in left dataset; not a one-to-one merge" ) elif not right_unique: raise MergeError( - "Merge keys are not unique in right dataset; " - "not a one-to-one merge" + "Merge keys are not unique in right dataset; not a one-to-one merge" ) elif validate in ["one_to_many", "1:m"]: if not left_unique: raise MergeError( - "Merge keys are not unique in left dataset; " - "not a one-to-many merge" + "Merge keys are not unique in left dataset; not a one-to-many merge" ) elif validate in ["many_to_one", "m:1"]: @@ -1833,8 +1826,7 @@ def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = raise AssertionError( "If more than one join key is given then " "'right_ax' must be a MultiIndex and the " - "number of join keys must be the number of " - "levels in right_ax" + "number of join keys must be the number of levels in right_ax" ) left_indexer, right_indexer = _get_multiindex_indexer( @@ -2004,8 +1996,7 @@ def _validate_operand(obj: FrameOrSeries) -> "DataFrame": return obj.to_frame() else: raise TypeError( - "Can only merge Series or DataFrame objects, " - "a {obj} was passed".format(obj=type(obj)) + f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" ) @@ -2021,10 +2012,7 @@ def _items_overlap_with_suffix(left: Index, lsuffix, right: Index, rsuffix): return left, right if not lsuffix and not rsuffix: - raise ValueError( - "columns overlap but no suffix specified: " - "{rename}".format(rename=to_rename) - ) + raise ValueError(f"columns overlap but no suffix specified: {to_rename}") def renamer(x, suffix): """ @@ -2043,7 +2031,7 @@ def renamer(x, suffix): x : renamed column name """ if x in to_rename and suffix is not None: - return "{x}{suffix}".format(x=x, suffix=suffix) + return f"{x}{suffix}" return x lrenamer = partial(renamer, suffix=lsuffix) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2e3eb9170b15c..5a444d908b786 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -363,8 +363,7 @@ def _bins_to_cuts( if duplicates not in ["raise", "drop"]: raise ValueError( - "invalid value for 'duplicates' parameter, " - "valid options are: raise, drop" + "invalid value for 'duplicates' parameter, valid options are: raise, drop" ) if isinstance(bins, IntervalIndex): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index cfa42d764ee44..898fbc6f8bc3b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -231,9 +231,7 @@ def _return_parsed_timezone_results(result, timezones, tz, name): """ if tz is not None: raise ValueError( - "Cannot pass a tz argument when " - "parsing strings with timezone " - "information." + "Cannot pass a tz argument when parsing strings with timezone information." ) tz_results = np.array( [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] @@ -817,8 +815,7 @@ def f(value): required = ",".join(req) raise ValueError( "to assemble mappings requires at least that " - f"[year, month, day] be specified: [{required}] " - "is missing" + f"[year, month, day] be specified: [{required}] is missing" ) # keys we don't recognize diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 64ec0e68e11b0..ed0b816f64800 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -98,8 +98,7 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False): and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame)) ): raise TypeError( - "arguments to moment function must be of type " - "np.ndarray/Series/DataFrame" + "arguments to moment function must be of type np.ndarray/Series/DataFrame" ) if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f612826132fd7..bdc94c7402eb5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1820,8 +1820,7 @@ def _on(self) -> Index: else: raise ValueError( f"invalid on specified as {self.on}, " - "must be a column (of DataFrame), an Index " - "or None" + "must be a column (of DataFrame), an Index or None" ) def validate(self): @@ -1838,9 +1837,8 @@ def validate(self): # we don't allow center if self.center: raise NotImplementedError( - "center is not implemented " - "for datetimelike and offset " - "based windows" + "center is not implemented for " + "datetimelike and offset based windows" ) # this will raise ValueError on non-fixed freqs @@ -1886,8 +1884,7 @@ def _validate_freq(self): except (TypeError, ValueError): raise ValueError( f"passed window {self.window} is not " - "compatible with a datetimelike " - "index" + "compatible with a datetimelike index" ) _agg_see_also_doc = dedent( diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index a084be54dfa10..9d284c8031840 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -136,8 +136,7 @@ def _maybe_convert_usecols(usecols): if is_integer(usecols): raise ValueError( "Passing an integer for `usecols` is no longer supported. " - "Please pass in a list of int from 0 to `usecols` " - "inclusive instead." + "Please pass in a list of int from 0 to `usecols` inclusive instead." ) if isinstance(usecols, str): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6adf69a922000..296b305f41dd2 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -737,12 +737,8 @@ def _to_str_columns(self) -> List[List[str]]: self.header = cast(List[str], self.header) if len(self.header) != len(self.columns): raise ValueError( - ( - "Writing {ncols} cols but got {nalias} " - "aliases".format( - ncols=len(self.columns), nalias=len(self.header) - ) - ) + f"Writing {len(self.columns)} cols " + f"but got {len(self.header)} aliases" ) str_columns = [[label] for label in self.header] else: diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index b46b2f6c671d6..e3161415fe2bc 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -216,8 +216,8 @@ def _write_table(self, indent: int = 0) -> None: self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): raise TypeError( - "classes must be a string, list, or tuple, " - "not {typ}".format(typ=type(self.classes)) + "classes must be a string, list, " + f"or tuple, not {type(self.classes)}" ) _classes.extend(self.classes) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 008a99427f3c7..8ab56437d5c05 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -114,8 +114,7 @@ def pad_empties(x): column_format = index_format + column_format elif not isinstance(self.column_format, str): # pragma: no cover raise AssertionError( - "column_format must be str or unicode, " - "not {typ}".format(typ=type(column_format)) + f"column_format must be str or unicode, not {type(column_format)}" ) else: column_format = self.column_format diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index bb5bce96bc64b..211935009d2e5 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -267,8 +267,9 @@ cdef class Parser: elif column_types[j] == b's': self.column_types[j] = column_type_string else: - raise ValueError("unknown column type: " - f"{self.parser.columns[j].ctype}") + raise ValueError( + f"unknown column type: {self.parser.columns[j].ctype}" + ) # compression if parser.compression == const.rle_compression: diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index f917477b81489..9b40778dbcfdf 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -459,8 +459,7 @@ def _process_columnsize_subheader(self, offset, length): if self.col_count_p1 + self.col_count_p2 != self.column_count: print( f"Warning: column count mismatch ({self.col_count_p1} + " - f"{self.col_count_p2} != " - f"{self.column_count})\n" + f"{self.col_count_p2} != {self.column_count})\n" ) # Unknown purpose @@ -672,8 +671,7 @@ def _read_next_page(self): self.close() msg = ( "failed to read complete page from file (read " - f"{len(self._cached_page):d} of " - f"{self._page_length:d} bytes)" + f"{len(self._cached_page):d} of {self._page_length:d} bytes)" ) raise ValueError(msg) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 56ebb583bc2f9..27d56d4ede403 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -49,8 +49,7 @@ def read_sas( if format is None: buffer_error_msg = ( "If this is a buffer object rather " - "than a string name, you must specify " - "a format string" + "than a string name, you must specify a format string" ) filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 53a27e8782ad7..9e0ec4df02edf 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -132,8 +132,7 @@ def main(conda_fname, pip_fname, compare=False): ) if args.azure: msg = ( - "##vso[task.logissue type=error;" - f"sourcepath=requirements-dev.txt]{msg}" + f"##vso[task.logissue type=error;sourcepath=requirements-dev.txt]{msg}" ) sys.stderr.write(msg) sys.exit(res) From 20755396868c43f4ed0df78a63e5bd8825129a22 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Mon, 13 Jan 2020 23:30:24 +0100 Subject: [PATCH 30/37] Added small corrections to the test for interpolate limit_area (#30987) --- pandas/tests/series/test_missing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 7b6d9210ed3d9..d8eeefcbdce7b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1347,6 +1347,7 @@ def test_interp_limit_area(self): [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] ) result = s.interpolate(method="linear", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) result = s.interpolate( @@ -1362,6 +1363,7 @@ def test_interp_limit_area(self): [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] ) result = s.interpolate(method="linear", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) result = s.interpolate( @@ -1371,8 +1373,9 @@ def test_interp_limit_area(self): expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate( - method="linear", limit_area="outside", direction="backward" + method="linear", limit_area="outside", limit_direction="backward" ) + tm.assert_series_equal(result, expected) # raises an error even if limit type is wrong. msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" From 8ff2ebd9b2cfbe4a3e1eb7893dc4343c01ad55b4 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 14 Jan 2020 02:47:47 +0200 Subject: [PATCH 31/37] STY: concat strings (#30991) --- pandas/compat/__init__.py | 3 +-- pandas/compat/numpy/__init__.py | 6 ++---- pandas/compat/numpy/function.py | 5 ++--- pandas/core/generic.py | 3 +-- pandas/core/reshape/concat.py | 11 +++------- pandas/core/reshape/merge.py | 5 ++--- pandas/io/clipboards.py | 3 +-- pandas/io/common.py | 3 +-- pandas/io/date_converters.py | 3 +-- pandas/io/feather_format.py | 9 +++----- pandas/io/html.py | 3 +-- pandas/io/json/_normalize.py | 3 +-- pandas/io/parquet.py | 6 ++---- pandas/io/parsers.py | 26 ++++++++---------------- pandas/io/pytables.py | 19 +++++++---------- pandas/io/sql.py | 9 +++----- pandas/io/stata.py | 8 ++++---- pandas/plotting/_matplotlib/converter.py | 3 +-- pandas/plotting/_matplotlib/hist.py | 3 +-- pandas/plotting/_matplotlib/tools.py | 3 +-- 20 files changed, 47 insertions(+), 87 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 60cfecd5804ac..3547a33ea357b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -110,8 +110,7 @@ def _import_lzma(): return lzma except ImportError: msg = ( - "Could not import the lzma module. " - "Your installed Python is incomplete. " + "Could not import the lzma module. Your installed Python is incomplete. " "Attempting to use lzma compression will result in a RuntimeError." ) warnings.warn(msg) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 27f1c32058941..6c9ac5944e6a1 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -18,11 +18,9 @@ if _nlv < "1.13.3": raise ImportError( - f"this version of pandas is incompatible with " - f"numpy < 1.13.3\n" + "this version of pandas is incompatible with numpy < 1.13.3\n" f"your numpy version is {_np_version}.\n" - f"Please upgrade numpy to >= 1.13.3 to use " - f"this pandas version" + "Please upgrade numpy to >= 1.13.3 to use this pandas version" ) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 50f234cbf9419..05ecccc67daef 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -389,9 +389,8 @@ def validate_resampler_func(method: str, args, kwargs) -> None: if len(args) + len(kwargs) > 0: if method in RESAMPLER_NUMPY_OPS: raise UnsupportedFunctionCall( - f"numpy operations are not " - f"valid with resample. Use " - f".resample(...).{method}() instead" + "numpy operations are not valid with resample. " + f"Use .resample(...).{method}() instead" ) else: raise TypeError("too many arguments passed in") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 05066ac0ec128..ada26b55a778a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1701,8 +1701,7 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: multi_message = ( "\n" "For a multi-index, the label must be a " - "tuple with elements corresponding to " - "each level." + "tuple with elements corresponding to each level." ) else: multi_message = "" diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 502b8d1941fdf..449f70b2be2fd 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -305,8 +305,7 @@ def __init__( if isinstance(objs, (NDFrame, str)): raise TypeError( "first argument must be an iterable of pandas " - "objects, you passed an object of type " - '"{name}"'.format(name=type(objs).__name__) + f'objects, you passed an object of type "{type(objs).__name__}"' ) if join == "outer": @@ -577,10 +576,7 @@ def _maybe_check_integrity(self, concat_index: Index): if self.verify_integrity: if not concat_index.is_unique: overlap = concat_index[concat_index.duplicated()].unique() - raise ValueError( - "Indexes have overlapping values: " - "{overlap!s}".format(overlap=overlap) - ) + raise ValueError(f"Indexes have overlapping values: {overlap}") def _concat_indexes(indexes) -> Index: @@ -648,8 +644,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde # make sure that all of the passed indices have the same nlevels if not len({idx.nlevels for idx in indexes}) == 1: raise AssertionError( - "Cannot concat indices that do " - "not have the same number of levels" + "Cannot concat indices that do not have the same number of levels" ) # also copies diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index acb53ff6ca555..ceee2f66dba42 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1071,9 +1071,8 @@ def _maybe_coerce_merge_keys(self): continue msg = ( - "You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, rk_dtype=rk.dtype) + f"You are trying to merge on {lk.dtype} and " + f"{rk.dtype} columns. If you wish to proceed you should use pd.concat" ) # if we are numeric, then allow differing diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 34e8e03d8771e..97178261bdf72 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -69,8 +69,7 @@ def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover kwargs["engine"] = "python" elif len(sep) > 1 and kwargs.get("engine") == "c": warnings.warn( - "read_clipboard with regex separator does not work " - "properly with c engine" + "read_clipboard with regex separator does not work properly with c engine" ) return read_csv(StringIO(text), sep=sep, **kwargs) diff --git a/pandas/io/common.py b/pandas/io/common.py index 771a302d647ec..6a764ff252dea 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -78,8 +78,7 @@ def _expand_user( def validate_header_arg(header) -> None: if isinstance(header, bool): raise TypeError( - "Passing a bool to header is invalid. " - "Use header=None for no header or " + "Passing a bool to header is invalid. Use header=None for no header or " "header=int or list-like of ints to specify " "the row(s) making up the column names" ) diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 7fdca2d65b05d..07919dbda63ae 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -57,8 +57,7 @@ def _check_columns(cols): for i, n in enumerate(map(len, tail)): if n != N: raise AssertionError( - f"All columns must have the same length: {N}; " - f"column {i} has length {n}" + f"All columns must have the same length: {N}; column {i} has length {n}" ) return N diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index eb05004d9137c..5d4925620e75f 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -37,16 +37,13 @@ def to_feather(df: DataFrame, path): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " - "for the index; you can .reset_index() " - "to make the index into column(s)" + "for the index; you can .reset_index() to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError( - "feather does not support serializing a " - "non-default index for the index; you " - "can .reset_index() to make the index " - "into column(s)" + "feather does not support serializing a non-default index for the index; " + "you can .reset_index() to make the index into column(s)" ) if df.index.name is not None: diff --git a/pandas/io/html.py b/pandas/io/html.py index eafcca0e85bb3..809ce77eef0bb 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -899,8 +899,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): f"The flavor {flav} failed to parse your input. " "Since you passed a non-rewindable file " "object, we can't rewind it to try " - "another parser. Try read_html() with a " - "different flavor." + "another parser. Try read_html() with a different flavor." ) retained = caught diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index c0596c984575a..cf292a13fed7f 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -317,8 +317,7 @@ def _recursive_extract(data, path, seen_meta, level=0): meta_val = np.nan else: raise KeyError( - "Try running with " - "errors='ignore' as key " + "Try running with errors='ignore' as key " f"{e} is not always present" ) meta_vals[key].append(meta_val) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3a686a1a3b122..4be62b886f076 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -32,8 +32,7 @@ def get_engine(engine: str) -> "BaseImpl": raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" - "pyarrow or fastparquet is required for parquet " - "support" + "pyarrow or fastparquet is required for parquet support" ) if engine == "pyarrow": @@ -156,8 +155,7 @@ def write( if "partition_on" in kwargs and partition_cols is not None: raise ValueError( "Cannot use both partition_on and " - "partition_cols. Use partition_cols for " - "partitioning data" + "partition_cols. Use partition_cols for partitioning data" ) elif "partition_on" in kwargs: partition_cols = kwargs.pop("partition_on") diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b4eb2fb1411d0..62b82f174e17c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -612,8 +612,7 @@ def parser_f( if delim_whitespace and delimiter != default_sep: raise ValueError( "Specified a delimiter with both sep and " - "delim_whitespace=True; you can only " - "specify one." + "delim_whitespace=True; you can only specify one." ) if engine is not None: @@ -968,8 +967,7 @@ def _clean_options(self, options, engine): fallback_reason = ( "the 'c' engine does not support " "regex separators (separators > 1 char and " - r"different from '\s+' are " - "interpreted as regex)" + r"different from '\s+' are interpreted as regex)" ) engine = "python" elif delim_whitespace: @@ -1000,8 +998,7 @@ def _clean_options(self, options, engine): fallback_reason = ( "ord(quotechar) > 127, meaning the " "quotechar is larger than one byte, " - "and the 'c' engine does not support " - "such quotechars" + "and the 'c' engine does not support such quotechars" ) engine = "python" @@ -1119,9 +1116,8 @@ def _make_engine(self, engine="c"): klass = FixedWidthFieldParser else: raise ValueError( - f"Unknown engine: {engine} (valid options are " - '"c", "python", or ' - '"python-fwf")' + f"Unknown engine: {engine} (valid options " + 'are "c", "python", or "python-fwf")' ) self._engine = klass(self.f, **self.options) @@ -1230,8 +1226,7 @@ def _validate_usecols_names(usecols, names): missing = [c for c in usecols if c not in names] if len(missing) > 0: raise ValueError( - "Usecols do not match columns, " - f"columns expected but not found: {missing}" + f"Usecols do not match columns, columns expected but not found: {missing}" ) return usecols @@ -1325,8 +1320,7 @@ def _validate_parse_dates_arg(parse_dates): that is the case. """ msg = ( - "Only booleans, lists, and " - "dictionaries are accepted " + "Only booleans, lists, and dictionaries are accepted " "for the 'parse_dates' parameter" ) @@ -1680,8 +1674,7 @@ def _convert_to_ndarrays( warnings.warn( ( "Both a converter and dtype were specified " - f"for column {c} - only the converter will " - "be used" + f"for column {c} - only the converter will be used" ), ParserWarning, stacklevel=7, @@ -1826,8 +1819,7 @@ def _cast_types(self, values, cast_type, column): except NotImplementedError: raise NotImplementedError( f"Extension Array: {array_type} must implement " - "_from_sequence_of_strings in order " - "to be used in parser methods" + "_from_sequence_of_strings in order to be used in parser methods" ) else: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d61d1cf7f0257..9e8d8a2e89f20 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -413,8 +413,8 @@ def read_hdf( for group_to_check in groups[1:]: if not _is_metadata_of(group_to_check, candidate_only_group): raise ValueError( - "key must be provided when HDF5 file " - "contains multiple datasets." + "key must be provided when HDF5 " + "file contains multiple datasets." ) key = candidate_only_group._v_pathname return store.select( @@ -1240,8 +1240,7 @@ def append_to_multiple( if v is None: if remain_key is not None: raise ValueError( - "append_to_multiple can only have one value in d that " - "is None" + "append_to_multiple can only have one value in d that is None" ) remain_key = k else: @@ -2313,8 +2312,7 @@ def validate_attr(self, append): existing_dtype = getattr(self.attrs, self.dtype_attr, None) if existing_dtype is not None and existing_dtype != self.dtype: raise ValueError( - "appended items dtype do not match existing " - "items dtype in table!" + "appended items dtype do not match existing items dtype in table!" ) def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): @@ -2680,14 +2678,12 @@ def validate_read(self, columns, where): if columns is not None: raise TypeError( "cannot pass a column specification when reading " - "a Fixed format store. this store must be " - "selected in its entirety" + "a Fixed format store. this store must be selected in its entirety" ) if where is not None: raise TypeError( "cannot pass a where specification when reading " - "from a Fixed format store. this store must be " - "selected in its entirety" + "from a Fixed format store. this store must be selected in its entirety" ) @property @@ -2908,8 +2904,7 @@ def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None) if is_categorical_dtype(value): raise NotImplementedError( - "Cannot store a category dtype in " - "a HDF5 dataset that uses format=" + "Cannot store a category dtype in a HDF5 dataset that uses format=" '"fixed". Use format="table".' ) if not empty_array: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f4527994db0d2..58fed0d18dd4a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -977,8 +977,7 @@ def _sqlalchemy_type(self, col): if col_type == "timedelta64": warnings.warn( "the 'timedelta' type is not supported, and will be " - "written as integer values (ns frequency) to the " - "database.", + "written as integer values (ns frequency) to the database.", UserWarning, stacklevel=8, ) @@ -1413,8 +1412,7 @@ def _get_valid_sqlite_name(name): _SAFE_NAMES_WARNING = ( "The spaces in these column names will not be changed. " - "In pandas versions < 0.14, spaces were converted to " - "underscores." + "In pandas versions < 0.14, spaces were converted to underscores." ) @@ -1528,8 +1526,7 @@ def _sql_type_name(self, col): if col_type == "timedelta64": warnings.warn( "the 'timedelta' type is not supported, and will be " - "written as integer values (ns frequency) to the " - "database.", + "written as integer values (ns frequency) to the database.", UserWarning, stacklevel=8, ) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b216ee80c3940..2c1222aad12cc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -639,8 +639,7 @@ def __init__(self, catarray, encoding="latin-1"): if self.text_len > 32000: raise ValueError( "Stata value labels for a single variable must " - "have a combined length less than 32,000 " - "characters." + "have a combined length less than 32,000 characters." ) # Ensure int32 @@ -1729,9 +1728,10 @@ def _do_select_columns(self, data, columns): raise ValueError("columns contains duplicate entries") unmatched = column_set.difference(data.columns) if unmatched: + joined = ", ".join(list(unmatched)) raise ValueError( - "The following columns were not found in the " - "Stata data set: " + ", ".join(list(unmatched)) + "The following columns were not " + f"found in the Stata data set: {joined}" ) # Copy information for retained columns for later processing dtyplist = [] diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 5b37ebb42aecc..a1035fd0823bb 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -421,8 +421,7 @@ def __call__(self): if estimate > self.MAXTICKS * 2: raise RuntimeError( "MillisecondLocator estimated to generate " - f"{estimate:d} ticks from {dmin} to {dmax}: " - "exceeds Locator.MAXTICKS" + f"{estimate:d} ticks from {dmin} to {dmax}: exceeds Locator.MAXTICKS" f"* 2 ({self.MAXTICKS * 2:d}) " ) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index f8b2c7ab123d0..d54fc73b495ba 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -318,8 +318,7 @@ def hist_series( if "figure" in kwds: raise ValueError( "Cannot pass 'figure' when using the " - "'by' argument, since a new 'Figure' instance " - "will be created" + "'by' argument, since a new 'Figure' instance will be created" ) axes = _grouped_hist( self, diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index dd4034a97f58e..d7732c86911b8 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -190,8 +190,7 @@ def _subplots( if sharex or sharey: warnings.warn( "When passing multiple axes, sharex and sharey " - "are ignored. These settings must be specified " - "when creating axes", + "are ignored. These settings must be specified when creating axes", UserWarning, stacklevel=4, ) From 13b22fd94d45afd44045ef77b8c929744efe6a7b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Jan 2020 20:46:11 -0800 Subject: [PATCH 32/37] CLN: de-duplicate _getitem_scalar (#30992) --- pandas/core/indexing.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cc11879142ffe..10e71e72dd885 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1743,12 +1743,14 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): class _LocationIndexer(_NDFrameIndexer): + _takeable: bool = False + def __getitem__(self, key): if type(key) is tuple: key = tuple(com.apply_if_callable(x, self.obj) for x in key) if self._is_scalar_access(key): try: - return self._getitem_scalar(key) + return self.obj._get_value(*key, takeable=self._takeable) except (KeyError, IndexError, AttributeError): # AttributeError for IntervalTree get_value pass @@ -1763,9 +1765,6 @@ def __getitem__(self, key): def _is_scalar_access(self, key: Tuple): raise NotImplementedError() - def _getitem_scalar(self, key): - raise NotImplementedError() - def _getitem_axis(self, key, axis: int): raise NotImplementedError() @@ -1854,12 +1853,6 @@ def _is_scalar_access(self, key: Tuple) -> bool: return True - def _getitem_scalar(self, key): - # a fast-path to scalar access - # if not, raise - values = self.obj._get_value(*key) - return values - def _get_partial_string_timestamp_match_key(self, key, labels): """ Translate any partial string timestamp matches in key, returning the @@ -1965,6 +1958,7 @@ class _iLocIndexer(_LocationIndexer): "point is EXCLUDED), listlike of integers, boolean array" ) _get_slice_axis = _NDFrameIndexer._get_slice_axis + _takeable = True def _validate_key(self, key, axis: int): if com.is_bool_indexer(key): @@ -2029,12 +2023,6 @@ def _is_scalar_access(self, key: Tuple) -> bool: return True - def _getitem_scalar(self, key): - # a fast-path to scalar access - # if not, raise - values = self.obj._get_value(*key, takeable=True) - return values - def _validate_integer(self, key: int, axis: int) -> None: """ Check that 'key' is a valid position in the desired axis. From 0f048cb275aeeed8a202b979a32bb08b07953919 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Jan 2020 00:57:39 -0800 Subject: [PATCH 33/37] CLN: remove geopandas compat code (#30909) --- pandas/core/indexing.py | 71 +-------------------------------- pandas/tests/test_downstream.py | 22 +--------- 2 files changed, 2 insertions(+), 91 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 10e71e72dd885..04503e5d98c10 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -27,7 +27,7 @@ is_list_like_indexer, length_of_indexer, ) -from pandas.core.indexes.api import Index, InvalidIndexError +from pandas.core.indexes.api import Index # "null slice" _NS = slice(None, None) @@ -579,39 +579,6 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - # TODO: remove once geopandas no longer needs this - def __getitem__(self, key): - # Used in ix and downstream in geopandas _CoordinateIndexer - if type(key) is tuple: - # Note: we check the type exactly instead of with isinstance - # because NamedTuple is checked separately. - key = tuple(com.apply_if_callable(x, self.obj) for x in key) - try: - values = self.obj._get_value(*key) - except (KeyError, TypeError, InvalidIndexError, AttributeError): - # TypeError occurs here if the key has non-hashable entries, - # generally slice or list. - # TODO(ix): most/all of the TypeError cases here are for ix, - # so this check can be removed once ix is removed. - # The InvalidIndexError is only catched for compatibility - # with geopandas, see - # https://github.com/pandas-dev/pandas/issues/27258 - # TODO: The AttributeError is for IntervalIndex which - # incorrectly implements get_value, see - # https://github.com/pandas-dev/pandas/issues/27865 - pass - else: - if is_scalar(values): - return values - - return self._getitem_tuple(key) - else: - # we by definition only have the 0th axis - axis = self.axis or 0 - - key = com.apply_if_callable(key, self.obj) - return self._getitem_axis(key, axis=axis) - def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first @@ -1460,42 +1427,6 @@ def _getitem_nested_tuple(self, tup: Tuple): return obj - # TODO: remove once geopandas no longer needs __getitem__ - def _getitem_axis(self, key, axis: int): - if is_iterator(key): - key = list(key) - self._validate_key(key, axis) - - labels = self.obj._get_axis(axis) - if isinstance(key, slice): - return self._get_slice_axis(key, axis=axis) - elif is_list_like_indexer(key) and not ( - isinstance(key, tuple) and isinstance(labels, ABCMultiIndex) - ): - - if hasattr(key, "ndim") and key.ndim > 1: - raise ValueError("Cannot index with multidimensional key") - - return self._getitem_iterable(key, axis=axis) - else: - - # maybe coerce a float scalar to integer - key = labels._maybe_cast_indexer(key) - - if is_integer(key): - if axis == 0 and isinstance(labels, ABCMultiIndex): - try: - return self._get_label(key, axis=axis) - except (KeyError, TypeError): - if self.obj.index.levels[0].is_integer(): - raise - - # this is the fallback! (for a non-float, non-integer index) - if not labels.is_floating() and not labels.is_integer(): - return self._get_loc(key, axis=axis) - - return self._get_label(key, axis=axis) - def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): """ Transform a list-like of keys into a new index and an indexer. diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ee006233c4c1b..8edd9f20ec63c 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -8,7 +8,7 @@ import numpy as np # noqa import pytest -from pandas import DataFrame, Series +from pandas import DataFrame import pandas._testing as tm @@ -114,26 +114,6 @@ def test_geopandas(): assert geopandas.read_file(fp) is not None -def test_geopandas_coordinate_indexer(): - # this test is included to have coverage of one case in the indexing.py - # code that is only kept for compatibility with geopandas, see - # https://github.com/pandas-dev/pandas/issues/27258 - # We should be able to remove this after some time when its usage is - # removed in geopandas - from pandas.core.indexing import _NDFrameIndexer - - class _CoordinateIndexer(_NDFrameIndexer): - def _getitem_tuple(self, tup): - obj = self.obj - xs, ys = tup - return obj[xs][ys] - - Series._create_indexer("cx", _CoordinateIndexer) - s = Series(range(5)) - res = s.cx[:, :] - tm.assert_series_equal(s, res) - - # Cython import warning @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") @pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") From b18024d051603702d1fd58b5bdb5b1ac5c7cf119 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 14 Jan 2020 13:07:24 +0200 Subject: [PATCH 34/37] STY: Whitespaces placed at the beginning instead at the end of a line (#30996) --- pandas/core/arrays/categorical.py | 4 +- pandas/core/arrays/datetimes.py | 13 +++---- pandas/core/arrays/period.py | 4 +- pandas/core/computation/expr.py | 4 +- pandas/core/generic.py | 11 +++--- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/multi.py | 12 +++--- pandas/core/series.py | 4 +- pandas/io/common.py | 4 +- pandas/io/parsers.py | 4 +- .../arrays/categorical/test_operators.py | 8 ++-- pandas/tests/computation/test_eval.py | 12 +++--- pandas/tests/frame/indexing/test_indexing.py | 10 ++--- pandas/tests/frame/test_api.py | 4 +- pandas/tests/frame/test_constructors.py | 6 +-- pandas/tests/frame/test_dtypes.py | 8 ++-- pandas/tests/indexes/common.py | 4 +- .../indexes/datetimes/test_constructors.py | 4 +- pandas/tests/indexes/multi/test_analytics.py | 4 +- pandas/tests/indexes/period/test_indexing.py | 8 ++-- pandas/tests/indexes/test_numeric.py | 4 +- pandas/tests/indexing/test_floats.py | 38 +++++++++---------- pandas/tests/indexing/test_iloc.py | 6 +-- pandas/tests/indexing/test_indexing.py | 8 ++-- pandas/tests/indexing/test_loc.py | 16 ++++---- pandas/tests/indexing/test_partial.py | 8 ++-- pandas/tests/io/test_common.py | 8 ++-- pandas/tests/io/test_stata.py | 20 +++++----- pandas/tests/resample/test_resample_api.py | 4 +- .../tests/scalar/timedelta/test_arithmetic.py | 4 +- .../tests/series/indexing/test_alter_index.py | 4 +- pandas/tests/series/indexing/test_boolean.py | 8 ++-- pandas/tests/series/indexing/test_indexing.py | 4 +- pandas/tests/series/indexing/test_numeric.py | 4 +- pandas/tests/series/methods/test_argsort.py | 4 +- pandas/tests/series/methods/test_isin.py | 4 +- pandas/tests/series/methods/test_replace.py | 4 +- pandas/tests/series/test_alter_axes.py | 4 +- pandas/tests/series/test_dtypes.py | 8 ++-- pandas/tests/series/test_missing.py | 4 +- pandas/tests/test_algos.py | 4 +- pandas/tests/util/test_validate_kwargs.py | 4 +- 42 files changed, 150 insertions(+), 154 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2806635211459..9d7359dd9c614 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2404,8 +2404,8 @@ def isin(self, values): if not is_list_like(values): values_type = type(values).__name__ raise TypeError( - "only list-like objects are allowed to be passed" - f" to isin(), you passed a [{values_type}]" + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{values_type}]" ) values = sanitize_array(values, None, None) null_mask = np.asarray(isna(values)) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e42402b307f28..1988b2e9e33f2 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -234,11 +234,10 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): values = values._data if not isinstance(values, np.ndarray): - msg = ( + raise ValueError( f"Unexpected type '{type(values).__name__}'. 'values' must be " "a DatetimeArray ndarray, or Series or Index containing one of those." ) - raise ValueError(msg) if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") @@ -249,20 +248,18 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): values = values.view(_NS_DTYPE) if values.dtype != _NS_DTYPE: - msg = ( - "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." - f" Got {values.dtype} instead." + raise ValueError( + "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'. " + f"Got {values.dtype} instead." ) - raise ValueError(msg) dtype = _validate_dt64_dtype(dtype) if freq == "infer": - msg = ( + raise ValueError( "Frequency inference not allowed in DatetimeArray.__init__. " "Use 'pd.array()' instead." ) - raise ValueError(msg) if copy: values = values.copy() diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 697d759206ff9..1e2a02e988fdd 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -297,8 +297,8 @@ def __arrow_array__(self, type=None): # ensure we have the same freq if self.freqstr != type.freq: raise TypeError( - "Not supported to convert PeriodArray to array with different" - f" 'freq' ({self.freqstr} vs {type.freq})" + "Not supported to convert PeriodArray to array with different " + f"'freq' ({self.freqstr} vs {type.freq})" ) else: raise TypeError( diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 1350587b5ca90..d91586e6c9b81 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -466,8 +466,8 @@ def _maybe_evaluate_binop( if res.has_invalid_return_type: raise TypeError( - f"unsupported operand type(s) for {res.op}:" - f" '{lhs.type}' and '{rhs.type}'" + f"unsupported operand type(s) for {res.op}: " + f"'{lhs.type}' and '{rhs.type}'" ) if self.engine != "pytables": diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ada26b55a778a..c501ada6b5783 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -265,8 +265,8 @@ def _validate_dtype(self, dtype): # a compound dtype if dtype.kind == "V": raise NotImplementedError( - "compound dtypes are not implemented" - f" in the {type(self).__name__} constructor" + "compound dtypes are not implemented " + f"in the {type(self).__name__} constructor" ) return dtype @@ -8993,11 +8993,10 @@ def tshift( new_data = self._data.copy() new_data.axes[block_axis] = index.shift(periods) elif orig_freq is not None: - msg = ( - f"Given freq {freq.rule_code} does not match" - f" PeriodIndex freq {orig_freq.rule_code}" + raise ValueError( + f"Given freq {freq.rule_code} does not match " + f"PeriodIndex freq {orig_freq.rule_code}" ) - raise ValueError(msg) else: new_data = self._data.copy() new_data.axes[block_axis] = index.shift(periods, freq) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 62e3fd28f6684..f2f53f564da76 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4790,8 +4790,8 @@ def get_slice_bound(self, label, side, kind): if side not in ("left", "right"): raise ValueError( - f"Invalid value for side kwarg, must be either" - f" 'left' or 'right': {side}" + "Invalid value for side kwarg, must be either " + f"'left' or 'right': {side}" ) original_label = label diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 84d7399cc4f2d..21421a6f6ea62 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1288,8 +1288,8 @@ def _get_level_number(self, level) -> int: if level < 0: orig_level = level - self.nlevels raise IndexError( - f"Too many levels: Index has only {self.nlevels} levels," - f" {orig_level} is not a valid level number" + f"Too many levels: Index has only {self.nlevels} levels, " + f"{orig_level} is not a valid level number" ) # Note: levels are zero-based elif level >= self.nlevels: @@ -2171,8 +2171,8 @@ def reorder_levels(self, order): order = [self._get_level_number(i) for i in order] if len(order) != self.nlevels: raise AssertionError( - f"Length of order must be same as number of levels ({self.nlevels})," - f" got {len(order)}" + f"Length of order must be same as number of levels ({self.nlevels}), " + f"got {len(order)}" ) new_levels = [self.levels[i] for i in order] new_codes = [self.codes[i] for i in order] @@ -2527,8 +2527,8 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): def _partial_tup_index(self, tup, side="left"): if len(tup) > self.lexsort_depth: raise UnsortedIndexError( - f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth" - f" ({self.lexsort_depth})" + f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " + f"({self.lexsort_depth})" ) n = len(tup) diff --git a/pandas/core/series.py b/pandas/core/series.py index fe5c5fd5e2bc8..33565bbedade6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1404,8 +1404,8 @@ def to_string( # catch contract violations if not isinstance(result, str): raise AssertionError( - "result must be of type str, type" - f" of result is {repr(type(result).__name__)}" + "result must be of type str, type " + f"of result is {repr(type(result).__name__)}" ) if buf is None: diff --git a/pandas/io/common.py b/pandas/io/common.py index 6a764ff252dea..cf19169214c35 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -406,8 +406,8 @@ def get_handle( raise ValueError(f"Zero files found in ZIP file {path_or_buf}") else: raise ValueError( - "Multiple files found in ZIP file." - f" Only one file per ZIP: {zip_names}" + "Multiple files found in ZIP file. " + f"Only one file per ZIP: {zip_names}" ) # XZ Compression diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 62b82f174e17c..41db6ed0ef503 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -906,8 +906,8 @@ def _get_options_with_defaults(self, engine): pass else: raise ValueError( - f"The {repr(argname)} option is not supported with the" - f" {repr(engine)} engine" + f"The {repr(argname)} option is not supported with the " + f"{repr(engine)} engine" ) else: value = _deprecated_defaults.get(argname, default) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 8643e7f6f89c1..0c830c65e0f8b 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -97,8 +97,8 @@ def test_comparisons(self): # comparison (in both directions) with Series will raise s = Series(["b", "b", "b"]) msg = ( - "Cannot compare a Categorical for op __gt__ with type" - r" " + "Cannot compare a Categorical for op __gt__ with type " + r"" ) with pytest.raises(TypeError, match=msg): cat > s @@ -265,8 +265,8 @@ def test_comparisons(self, data, reverse, base): # categorical cannot be compared to Series or numpy array, and also # not the other way around msg = ( - "Cannot compare a Categorical for op __gt__ with type" - r" " + "Cannot compare a Categorical for op __gt__ with type " + r"" ) with pytest.raises(TypeError, match=msg): cat > s diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7f68abb92ba43..656b274aa1a9e 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -274,9 +274,9 @@ def check_operands(left, right, cmp_op): def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = f"lhs {cmp1} rhs" msg = ( - r"only list-like( or dict-like)? objects are allowed to be" - r" passed to (DataFrame\.)?isin\(\), you passed a" - r" (\[|')bool(\]|')|" + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')bool(\]|')|" "argument of type 'bool' is not iterable" ) if cmp1 in ("in", "not in") and not is_list_like(rhs): @@ -408,9 +408,9 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): ex = f"~(lhs {cmp1} rhs)" msg = ( - r"only list-like( or dict-like)? objects are allowed to be" - r" passed to (DataFrame\.)?isin\(\), you passed a" - r" (\[|')float(\]|')|" + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')float(\]|')|" "argument of type 'float' is not iterable" ) if is_scalar(rhs) and cmp1 in skip_these: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 33c0e92845484..40ecda7d74952 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -447,8 +447,8 @@ def test_setitem(self, float_frame): tm.assert_series_equal(series, float_frame["col6"], check_names=False) msg = ( - r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the" - r" \[columns\]\"" + r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the " + r"\[columns\]\"" ) with pytest.raises(KeyError, match=msg): float_frame[np.random.randn(len(float_frame) + 1)] = 1 @@ -1039,9 +1039,9 @@ def test_getitem_setitem_float_labels(self): # positional slicing only via iloc! msg = ( - "cannot do slice indexing on" - r" with" - r" these indexers \[1.0\] of " + "cannot do slice indexing on " + r" with " + r"these indexers \[1.0\] of " ) with pytest.raises(TypeError, match=msg): df.iloc[1.0:5] diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 9263409f7a7f8..9de5d6fe16a0d 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -382,8 +382,8 @@ def test_swapaxes(self): tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) tm.assert_frame_equal(df, df.swapaxes(0, 0)) msg = ( - "No axis named 2 for object type" - r" " + "No axis named 2 for object type " + r"" ) with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ea1e339f44d93..a861e0eb52391 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1854,9 +1854,9 @@ def check(df): # No NaN found -> error if len(indexer) == 0: msg = ( - "cannot do label indexing on" - r" " - r" with these indexers \[nan\] of " + "cannot do label indexing on " + r" " + r"with these indexers \[nan\] of " ) with pytest.raises(TypeError, match=msg): df.loc[:, np.nan] diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 06bb040224455..0d34f61ef1e5a 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -897,15 +897,15 @@ def test_astype_to_incorrect_datetimelike(self, unit): df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) msg = ( - r"cannot astype a datetimelike from \[datetime64\[ns\]\] to" - r" \[timedelta64\[{}\]\]" + r"cannot astype a datetimelike from \[datetime64\[ns\]\] to " + r"\[timedelta64\[{}\]\]" ).format(unit) with pytest.raises(TypeError, match=msg): df.astype(other) msg = ( - r"cannot astype a timedelta from \[timedelta64\[ns\]\] to" - r" \[datetime64\[{}\]\]" + r"cannot astype a timedelta from \[timedelta64\[ns\]\] to " + r"\[datetime64\[{}\]\]" ).format(unit) df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a16017b0e12c0..afc068d6696ef 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -37,8 +37,8 @@ class Base: def test_pickle_compat_construction(self): # need an object to create with msg = ( - r"Index\(\.\.\.\) must be called with a collection of some" - r" kind, None was passed|" + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed|" r"__new__\(\) missing 1 required positional argument: 'data'|" r"__new__\(\) takes at least 2 arguments \(1 given\)" ) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ffe51dd1fb9f5..95d14ad4c86f7 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -644,8 +644,8 @@ def test_constructor_dtype(self): ) msg = ( - "cannot supply both a tz and a timezone-naive dtype" - r" \(i\.e\. datetime64\[ns\]\)" + "cannot supply both a tz and a timezone-naive dtype " + r"\(i\.e\. datetime64\[ns\]\)" ) with pytest.raises(ValueError, match=msg): DatetimeIndex(idx, dtype="datetime64[ns]") diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index ac1e0893683d1..209cc627aba8b 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -334,8 +334,8 @@ def test_numpy_ufuncs(idx, func): else: expected_exception = TypeError msg = ( - "loop of ufunc does not support argument 0 of type tuple which" - f" has no callable {func.__name__} method" + "loop of ufunc does not support argument 0 of type tuple which " + f"has no callable {func.__name__} method" ) with pytest.raises(expected_exception, match=msg): func(idx) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 7dbefbdaff98e..2e3bf852667e5 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -409,8 +409,8 @@ def test_get_loc(self): idx0.get_loc(1.1) msg = ( - r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\]," - r" dtype='period\[D\]', freq='D'\)' is an invalid key" + r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\], " + r"dtype='period\[D\]', freq='D'\)' is an invalid key" ) with pytest.raises(TypeError, match=msg): idx0.get_loc(idx0) @@ -434,8 +434,8 @@ def test_get_loc(self): idx1.get_loc(1.1) msg = ( - r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\]," - r" dtype='period\[D\]', freq='D'\)' is an invalid key" + r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\], " + r"dtype='period\[D\]', freq='D'\)' is an invalid key" ) with pytest.raises(TypeError, match=msg): idx1.get_loc(idx1) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f025168643ab9..582f6c619d287 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -188,8 +188,8 @@ def test_constructor_invalid(self): # invalid msg = ( - r"Float64Index\(\.\.\.\) must be called with a collection of" - r" some kind, 0\.0 was passed" + r"Float64Index\(\.\.\.\) must be called with a collection of " + r"some kind, 0\.0 was passed" ) with pytest.raises(TypeError, match=msg): Float64Index(0.0) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 2cc8232566aa9..5530896a90941 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -123,9 +123,9 @@ def test_scalar_non_numeric(self): # setting with a float fails with iloc msg = ( - r"cannot do (label|index|positional) indexing" - r" on {klass} with these indexers \[3\.0\] of" - r" {kind}".format(klass=type(i), kind=str(float)) + r"cannot do (label|index|positional) indexing " + r"on {klass} with these indexers \[3\.0\] of " + r"{kind}".format(klass=type(i), kind=str(float)) ) with pytest.raises(TypeError, match=msg): s.iloc[3.0] = 0 @@ -160,9 +160,9 @@ def test_scalar_non_numeric(self): s = Series(np.arange(len(i)), index=i) s[3] msg = ( - r"cannot do (label|index) indexing" - r" on {klass} with these indexers \[3\.0\] of" - r" {kind}".format(klass=type(i), kind=str(float)) + r"cannot do (label|index) indexing " + r"on {klass} with these indexers \[3\.0\] of " + r"{kind}".format(klass=type(i), kind=str(float)) ) with pytest.raises(TypeError, match=msg): s[3.0] @@ -177,9 +177,9 @@ def test_scalar_with_mixed(self): for idxr in [lambda x: x, lambda x: x.iloc]: msg = ( - r"cannot do label indexing" - r" on {klass} with these indexers \[1\.0\] of" - r" {kind}|" + r"cannot do label indexing " + r"on {klass} with these indexers \[1\.0\] of " + r"{kind}|" "Cannot index by location index with a non-integer key".format( klass=str(Index), kind=str(float) ) @@ -199,9 +199,9 @@ def test_scalar_with_mixed(self): for idxr in [lambda x: x]: msg = ( - r"cannot do label indexing" - r" on {klass} with these indexers \[1\.0\] of" - r" {kind}".format(klass=str(Index), kind=str(float)) + r"cannot do label indexing " + r"on {klass} with these indexers \[1\.0\] of " + r"{kind}".format(klass=str(Index), kind=str(float)) ) with pytest.raises(TypeError, match=msg): idxr(s3)[1.0] @@ -313,9 +313,9 @@ def test_scalar_float(self): s.iloc[3.0] msg = ( - r"cannot do positional indexing" - r" on {klass} with these indexers \[3\.0\] of" - r" {kind}".format(klass=str(Float64Index), kind=str(float)) + r"cannot do positional indexing " + r"on {klass} with these indexers \[3\.0\] of " + r"{kind}".format(klass=str(Float64Index), kind=str(float)) ) with pytest.raises(TypeError, match=msg): s2.iloc[3.0] = 0 @@ -379,10 +379,10 @@ def test_slice_non_numeric(self): for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: msg = ( - "cannot do slice indexing" - r" on {klass} with these indexers" - r" \[(3|4)(\.0)?\]" - r" of ({kind_float}|{kind_int})".format( + "cannot do slice indexing " + r"on {klass} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of ({kind_float}|{kind_int})".format( klass=type(index), kind_float=str(float), kind_int=str(int), diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 26dedf02e7333..48c25ec034653 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -437,9 +437,9 @@ def test_iloc_getitem_labelled_frame(self): # trying to use a label msg = ( - r"Location based indexing can only have \[integer, integer" - r" slice \(START point is INCLUDED, END point is EXCLUDED\)," - r" listlike of integers, boolean array\] types" + r"Location based indexing can only have \[integer, integer " + r"slice \(START point is INCLUDED, END point is EXCLUDED\), " + r"listlike of integers, boolean array\] types" ) with pytest.raises(ValueError, match=msg): df.iloc["j", "D"] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 448a06070c45c..1913caae93932 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -81,8 +81,8 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): nd3 = np.random.randint(5, size=(2, 2, 2)) msg = ( - r"Buffer has wrong number of dimensions \(expected 1," - r" got 3\)|" + r"Buffer has wrong number of dimensions \(expected 1, " + r"got 3\)|" "Cannot index with multidimensional key|" r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|" "Index data must be 1-dimensional" @@ -134,8 +134,8 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): nd3 = np.random.randint(5, size=(2, 2, 2)) msg = ( - r"Buffer has wrong number of dimensions \(expected 1," - r" got 3\)|" + r"Buffer has wrong number of dimensions \(expected 1, " + r"got 3\)|" "'pandas._libs.interval.IntervalTree' object has no attribute " "'set_value'|" # AttributeError "unhashable type: 'numpy.ndarray'|" # TypeError diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index a36078b11c663..78fcd15ab4cc1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -219,8 +219,8 @@ def test_loc_to_fail(self): # raise a KeyError? msg = ( - r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are" - r" in the \[index\]\"" + r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " + r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): df.loc[[1, 2], [1, 2]] @@ -236,8 +236,8 @@ def test_loc_to_fail(self): s.loc[-1] msg = ( - r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are" - r" in the \[index\]\"" + r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " + r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): s.loc[[-1, -2]] @@ -252,8 +252,8 @@ def test_loc_to_fail(self): s["a"] = 2 msg = ( - r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are" - r" in the \[index\]\"" + r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " + r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): s.loc[[-2]] @@ -268,8 +268,8 @@ def test_loc_to_fail(self): df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) msg = ( - r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are" - r" in the \[index\]\"" + r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " + r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): df.loc[[3], :] diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 5fda759020f1a..2ce07ec41758f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -205,8 +205,8 @@ def test_series_partial_set(self): # raises as nothing in in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are" - r" in the \[index\]\"" + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " + r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] @@ -286,8 +286,8 @@ def test_series_partial_set_with_name(self): # raises as nothing in in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'," - r" name='idx'\)\] are in the \[index\]\"" + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " + r"name='idx'\)\] are in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a126f83164ce5..22aa78919ef0f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -146,8 +146,8 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): msg3 = "Expected object or value" msg4 = "path_or_buf needs to be a string file path or file-like" msg5 = ( - fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" - fr" '.+does_not_exist\.{fn_ext}'" + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" ) msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" msg7 = ( @@ -186,8 +186,8 @@ def test_read_expands_user_home_dir( msg3 = "Unexpected character found when decoding 'false'" msg4 = "path_or_buf needs to be a string file path or file-like" msg5 = ( - fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" - fr" '.+does_not_exist\.{fn_ext}'" + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" ) msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" msg7 = ( diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 1d3cddbf01738..5e4ec116645b0 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -737,10 +737,10 @@ def test_excessively_long_string(self): ) original = DataFrame(s) msg = ( - r"Fixed width strings in Stata \.dta files are limited to 244" - r" \(or fewer\)\ncharacters\. Column 's500' does not satisfy" - r" this restriction\. Use the\n'version=117' parameter to write" - r" the newer \(Stata 13 and later\) format\." + r"Fixed width strings in Stata \.dta files are limited to 244 " + r"\(or fewer\)\ncharacters\. Column 's500' does not satisfy " + r"this restriction\. Use the\n'version=117' parameter to write " + r"the newer \(Stata 13 and later\) format\." ) with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: @@ -968,8 +968,8 @@ def test_categorical_warnings_and_errors(self): ) with tm.ensure_clean() as path: msg = ( - "Stata value labels for a single variable must have" - r" a combined length less than 32,000 characters\." + "Stata value labels for a single variable must have " + r"a combined length less than 32,000 characters\." ) with pytest.raises(ValueError, match=msg): original.to_stata(path) @@ -1714,12 +1714,12 @@ def test_invalid_file_not_written(self, version): df = DataFrame([content], columns=["invalid"]) with tm.ensure_clean() as path: msg1 = ( - r"'latin-1' codec can't encode character '\\ufffd'" - r" in position 14: ordinal not in range\(256\)" + r"'latin-1' codec can't encode character '\\ufffd' " + r"in position 14: ordinal not in range\(256\)" ) msg2 = ( - "'ascii' codec can't decode byte 0xef in position 14:" - r" ordinal not in range\(128\)" + "'ascii' codec can't decode byte 0xef in position 14: " + r"ordinal not in range\(128\)" ) with pytest.raises(UnicodeEncodeError, match=r"{}|{}".format(msg1, msg2)): with tm.assert_produces_warning(ResourceWarning): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 170201b4f8e5c..d552241f9126f 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -257,8 +257,8 @@ def test_fillna(): tm.assert_series_equal(result, expected) msg = ( - r"Invalid fill method\. Expecting pad \(ffill\), backfill" - r" \(bfill\) or nearest\. Got 0" + r"Invalid fill method\. Expecting pad \(ffill\), backfill " + r"\(bfill\) or nearest\. Got 0" ) with pytest.raises(ValueError, match=msg): r.fillna(0) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 3764d9b7548fc..ce08a47f824ee 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -271,8 +271,8 @@ def test_ops_ndarray(self): tm.assert_numpy_array_equal(td * np.array([2]), expected) tm.assert_numpy_array_equal(np.array([2]) * td, expected) msg = ( - "ufunc '?multiply'? cannot use operands with types" - r" dtype\(' with these indexers \[{key}\]" - r" of " + r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\] " + r"of " ) with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): datetime_series[4.0:10.0] diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 1fc98ded0d3d2..62273e2d363fb 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -52,8 +52,8 @@ def test_argsort_stable(self): tm.assert_series_equal(mindexer, Series(mexpected), check_dtype=False) tm.assert_series_equal(qindexer, Series(qexpected), check_dtype=False) msg = ( - r"ndarray Expected type ," - r" found instead" + r"ndarray Expected type , " + r"found instead" ) with pytest.raises(AssertionError, match=msg): tm.assert_numpy_array_equal(qindexer, mindexer) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index ca93e989ba6b5..3836c1d56bf87 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -29,8 +29,8 @@ def test_isin_with_string_scalar(self): # GH#4763 s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) msg = ( - r"only list-like objects are allowed to be passed to isin\(\)," - r" you passed a \[str\]" + r"only list-like objects are allowed to be passed to isin\(\), " + r"you passed a \[str\]" ) with pytest.raises(TypeError, match=msg): s.isin("a") diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index b20baa2836363..770ad38b0215e 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -120,8 +120,8 @@ def test_replace_with_single_list(self): # make sure things don't get corrupted when fillna call fails s = ser.copy() msg = ( - r"Invalid fill method\. Expecting pad \(ffill\) or backfill" - r" \(bfill\)\. Got crash_cymbal" + r"Invalid fill method\. Expecting pad \(ffill\) or backfill " + r"\(bfill\)\. Got crash_cymbal" ) with pytest.raises(ValueError, match=msg): s.replace([1, 2, 3], inplace=True, method="crash_cymbal") diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 628c66583535d..71f6681e8c955 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -11,8 +11,8 @@ class TestSeriesAlterAxes: def test_setindex(self, string_series): # wrong type msg = ( - r"Index\(\.\.\.\) must be called with a collection of some" - r" kind, None was passed" + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed" ) with pytest.raises(TypeError, match=msg): string_series.index = None diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a57ec2ba05d54..1fc582156a884 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -193,8 +193,8 @@ def test_astype_dict_like(self, dtype_class): dt3 = dtype_class({"abc": str, "def": str}) msg = ( - "Only the Series name can be used for the key in Series dtype" - r" mappings\." + "Only the Series name can be used for the key in Series dtype " + r"mappings\." ) with pytest.raises(KeyError, match=msg): s.astype(dt3) @@ -410,8 +410,8 @@ def test_arg_for_errors_in_astype(self): s = Series([1, 2, 3]) msg = ( - r"Expected value of kwarg 'errors' to be one of \['raise'," - r" 'ignore'\]\. Supplied value is 'False'" + r"Expected value of kwarg 'errors' to be one of \['raise', " + r"'ignore'\]\. Supplied value is 'False'" ) with pytest.raises(ValueError, match=msg): s.astype(np.float64, errors=False) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index d8eeefcbdce7b..6b7d9e00a5228 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1324,8 +1324,8 @@ def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) msg = ( - r"Invalid limit_direction: expecting one of \['forward'," - r" 'backward', 'both'\], got 'abc'" + r"Invalid limit_direction: expecting one of \['forward', " + r"'backward', 'both'\], got 'abc'" ) with pytest.raises(ValueError, match=msg): s.interpolate(method="linear", limit=2, limit_direction="abc") diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 2b46f86d49c5e..6c7f8c9b0475e 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -653,8 +653,8 @@ class TestIsin: def test_invalid(self): msg = ( - r"only list-like objects are allowed to be passed to isin\(\)," - r" you passed a \[int\]" + r"only list-like objects are allowed to be passed to isin\(\), " + r"you passed a \[int\]" ) with pytest.raises(TypeError, match=msg): algos.isin(1, 1) diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py index a7b6d8f98cc60..8fe2a3712bf49 100644 --- a/pandas/tests/util/test_validate_kwargs.py +++ b/pandas/tests/util/test_validate_kwargs.py @@ -49,8 +49,8 @@ def test_validation(): @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) def test_validate_bool_kwarg_fail(name, value): msg = ( - f'For argument "{name}" expected type bool,' - f" received type {type(value).__name__}" + f'For argument "{name}" expected type bool, ' + f"received type {type(value).__name__}" ) with pytest.raises(ValueError, match=msg): From 3471270b1088e552879f5f292a31cc763c1549bb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 14 Jan 2020 06:39:47 -0600 Subject: [PATCH 35/37] API: Disallow NaN in StringArray constructor (#30980) --- pandas/_libs/lib.pyx | 4 +++ pandas/core/arrays/string_.py | 34 ++++++++++++++-------- pandas/core/strings.py | 8 +++-- pandas/tests/arrays/string_/test_string.py | 19 ++++++++++++ pandas/tests/dtypes/test_inference.py | 7 ++++- pandas/tests/test_strings.py | 2 +- 6 files changed, 58 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 719db5c03f07f..acd74591134bc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1624,6 +1624,10 @@ cdef class StringValidator(Validator): cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.str_) + cdef bint is_valid_null(self, object value) except -1: + # We deliberately exclude None / NaN here since StringArray uses NA + return value is C_NA + cpdef bint is_string_array(ndarray values, bint skipna=False): cdef: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 84130132de4dc..c485d1f50dc9d 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -93,9 +93,6 @@ class StringArray(PandasArray): StringArray is considered experimental. The implementation and parts of the API may change without warning. - In particular, the NA value used may change to no longer be - ``numpy.nan``. - Parameters ---------- values : array-like @@ -104,8 +101,11 @@ class StringArray(PandasArray): .. warning:: Currently, this expects an object-dtype ndarray - where the elements are Python strings. This may - change without warning in the future. + where the elements are Python strings or :attr:`pandas.NA`. + This may change without warning in the future. Use + :meth:`pandas.array` with ``dtype="string"`` for a stable way of + creating a `StringArray` from any sequence. + copy : bool, default False Whether to copy the array of data. @@ -119,6 +119,8 @@ class StringArray(PandasArray): See Also -------- + array + The recommended function for creating a StringArray. Series.str The string methods are available on Series backed by a StringArray. @@ -165,12 +167,10 @@ def __init__(self, values, copy=False): def _validate(self): """Validate that we only store NA or strings.""" if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True): - raise ValueError( - "StringArray requires a sequence of strings or missing values." - ) + raise ValueError("StringArray requires a sequence of strings or pandas.NA") if self._ndarray.dtype != "object": raise ValueError( - "StringArray requires a sequence of strings. Got " + "StringArray requires a sequence of strings or pandas.NA. Got " f"'{self._ndarray.dtype}' dtype instead." ) @@ -178,12 +178,22 @@ def _validate(self): def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" - result = super()._from_sequence(scalars, dtype=object, copy=copy) + + result = np.asarray(scalars, dtype="object") + if copy and result is scalars: + result = result.copy() + # Standardize all missing-like values to NA # TODO: it would be nice to do this in _validate / lib.is_string_array # We are already doing a scan over the values there. - result[result.isna()] = StringDtype.na_value - return result + na_values = isna(result) + if na_values.any(): + if result is scalars: + # force a copy now, if we haven't already + result = result.copy() + result[na_values] = StringDtype.na_value + + return cls(result) @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index f8d9eeb211a1e..0323eafff8dee 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -8,6 +8,7 @@ import numpy as np import pandas._libs.lib as lib +import pandas._libs.missing as libmissing import pandas._libs.ops as libops from pandas._typing import ArrayLike, Dtype from pandas.util._decorators import Appender @@ -118,12 +119,15 @@ def cat_safe(list_of_columns: List, sep: str): return result -def _na_map(f, arr, na_result=np.nan, dtype=object): - # should really _check_ for NA +def _na_map(f, arr, na_result=None, dtype=object): if is_extension_array_dtype(arr.dtype): + if na_result is None: + na_result = libmissing.NA # just StringDtype arr = extract_array(arr) return _map_stringarray(f, arr, na_value=na_result, dtype=dtype) + if na_result is None: + na_result = np.nan return _map_object(f, arr, na_mask=True, na_value=na_result, dtype=dtype) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 33e68f029922e..5e2f14af341ab 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -194,6 +194,25 @@ def test_constructor_raises(): with pytest.raises(ValueError, match="sequence of strings"): pd.arrays.StringArray(np.array([])) + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", np.nan], dtype=object)) + + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", None], dtype=object)) + + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", pd.NaT], dtype=object)) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_from_sequence_no_mutate(copy): + a = np.array(["a", np.nan], dtype=object) + original = a.copy() + result = pd.arrays.StringArray._from_sequence(a, copy=copy) + expected = pd.arrays.StringArray(np.array(["a", pd.NA], dtype=object)) + tm.assert_extension_array_equal(result, expected) + tm.assert_numpy_array_equal(a, original) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.xfail(reason="Not implemented StringArray.sum") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d022b0e97877a..5eb85de2b90f5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1114,11 +1114,16 @@ def test_is_string_array(self): assert lib.is_string_array(np.array(["foo", "bar"])) assert not lib.is_string_array( - np.array(["foo", "bar", np.nan], dtype=object), skipna=False + np.array(["foo", "bar", pd.NA], dtype=object), skipna=False ) assert lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=True + ) + # NaN is not valid for string array, just NA + assert not lib.is_string_array( np.array(["foo", "bar", np.nan], dtype=object), skipna=True ) + assert not lib.is_string_array(np.array([1, 2])) def test_to_object_array_tuples(self): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index a92f917820bd0..c37c78f3b9235 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3521,7 +3521,7 @@ def test_string_array(any_string_method): if isinstance(expected, Series): if expected.dtype == "object" and lib.is_string_array( - expected.values, skipna=True + expected.dropna().values, ): assert result.dtype == "string" result = result.astype(object) From 81d96369753f5244f011e41bf12b439a53f68852 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Jan 2020 04:52:25 -0800 Subject: [PATCH 36/37] BUG: ensure_datetime64ns with bigendian array (#30976) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/tslibs/conversion.pyx | 5 +++++ pandas/tests/series/test_constructors.py | 9 +++++++++ pandas/tests/tslibs/test_conversion.py | 9 +++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c3ee72f6442fc..14f82c2e71519 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -82,7 +82,7 @@ Numeric Conversion ^^^^^^^^^^ - +- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) - - diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2988d7bae9a5e..a2b433c2007ff 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -99,6 +99,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): shape = (arr).shape + if (arr).dtype.byteorder == ">": + # GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap + dtype = arr.dtype + arr = arr.astype(dtype.newbyteorder("<")) + ivalues = arr.view(np.int64).ravel() result = np.empty(shape, dtype=NS_DTYPE) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c38e5708be09b..d760939657d47 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -967,6 +967,15 @@ def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): expected = Series(pd.Timestamp(arg)).dt.tz_localize("CET") tm.assert_series_equal(result, expected) + def test_constructor_datetime64_bigendian(self): + # GH#30976 + ms = np.datetime64(1, "ms") + arr = np.array([np.datetime64(1, "ms")], dtype=">M8[ms]") + + result = Series(arr) + expected = Series([Timestamp(ms)]) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("interval_constructor", [IntervalIndex, IntervalArray]) def test_construction_interval(self, interval_constructor): # construction from interval & array of intervals diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 2beeae85de683..96c2d6bbd8106 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -72,6 +72,15 @@ def test_length_zero_copy(dtype, copy): assert result.base is (None if copy else arr) +def test_ensure_datetime64ns_bigendian(): + # GH#29684 + arr = np.array([np.datetime64(1, "ms")], dtype=">M8[ms]") + result = conversion.ensure_datetime64ns(arr) + + expected = np.array([np.datetime64(1, "ms")], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + class SubDatetime(datetime): pass From 7d24b1cc5126a18de3c70bd9c4a4134e67b7f4a3 Mon Sep 17 00:00:00 2001 From: tsvikas Date: Tue, 14 Jan 2020 16:01:50 +0200 Subject: [PATCH 37/37] docs: correct wrong result in pd.NA ** 0 "pd.NA ** 0" actually returns 1. The docs were fixed to represent that, and auto calculate the values. --- doc/source/user_guide/missing_data.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index df9949e8ac261..0f55980b3d015 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -825,13 +825,10 @@ For example, ``pd.NA`` propagates in arithmetic operations, similarly to There are a few special cases when the result is known, even when one of the operands is ``NA``. +.. ipython:: python -================ ====== -Operation Result -================ ====== -``pd.NA ** 0`` 0 -``1 ** pd.NA`` 1 -================ ====== + pd.NA ** 0 + 1 ** pd.NA In equality and comparison operations, ``pd.NA`` also propagates. This deviates from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always