From 06e27646aee57ee485b42806ef5149be6a62338a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 15:29:35 -0700 Subject: [PATCH 1/4] BUG: Setting DTI/TDI freq affecting other indexes viewing the same data --- pandas/core/indexes/datetimelike.py | 67 +++++++++++-------- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 1 - pandas/core/resample.py | 3 +- pandas/tests/arithmetic/test_timedelta64.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 3 +- pandas/tests/indexes/datetimes/test_ops.py | 14 ++++ pandas/tests/indexes/datetimes/test_setops.py | 5 +- pandas/tests/indexes/timedeltas/test_ops.py | 14 ++++ 9 files changed, 74 insertions(+), 37 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 25333b3a08dce..af9878e508f7c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -43,7 +43,7 @@ from pandas.core.ops import get_op_result_name from pandas.core.tools.timedeltas import to_timedelta -from pandas.tseries.frequencies import DateOffset +from pandas.tseries.frequencies import DateOffset, to_offset _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -80,8 +80,7 @@ def wrapper(left, right): cache=True, ) @inherit_names( - ["mean", "freq", "freqstr", "asi8", "_box_values", "_box_func"], - DatetimeLikeArrayMixin, + ["mean", "asi8", "_box_values", "_box_func"], DatetimeLikeArrayMixin, ) class DatetimeIndexOpsMixin(ExtensionIndex): """ @@ -613,17 +612,41 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + _freq = lib.no_default - def _set_freq(self, freq): + @property + def freq(self): """ - Set the _freq attribute on our underlying DatetimeArray. - - Parameters - ---------- - freq : DateOffset, None, or "infer" + In limited circumstances, our freq may differ from that of our _data. """ - # GH#29843 - self._data._with_freq(freq) + if self._freq is not lib.no_default: + return self._freq + return self._data.freq + + @property + def freqstr(self): + """ + Return the frequency object as a string if its set, otherwise None. + """ + if self.freq is None: + return None + return self.freq.freqstr + + def _with_freq(self, freq): + index = self.copy(deep=False) + if freq is None: + # Even if we _can_ have a freq, we might want to set it to None + index._freq = None + elif len(self) == 0 and isinstance(freq, DateOffset): + # Always valid. In the TimedeltaArray case, we assume this + # is a Tick offset. + index._freq = freq + else: + assert freq == "infer", freq + freq = to_offset(self.inferred_freq) + index._freq = freq + + return index def _shallow_copy(self, values=None, name: Label = lib.no_default): name = self.name if name is lib.no_default else name @@ -645,8 +668,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): @Appender(Index.difference.__doc__) def difference(self, other, sort=None): - new_idx = super().difference(other, sort=sort) - new_idx._set_freq(None) + new_idx = super().difference(other, sort=sort)._with_freq(None) return new_idx def intersection(self, other, sort=False): @@ -691,7 +713,7 @@ def intersection(self, other, sort=False): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result elif ( @@ -702,14 +724,7 @@ def intersection(self, other, sort=False): or (not self.is_monotonic or not other.is_monotonic) ): result = Index.intersection(self, other, sort=sort) - - # Invalidate the freq of `result`, which may not be correct at - # this point, depending on the values. - - result._set_freq(None) - result = self._shallow_copy(result._data, name=result.name) - if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result # to make our life easier, "sort" the two ranges @@ -780,9 +795,8 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) - result = self._shallow_copy(dates) - result._set_freq("infer") # TODO: can we infer that it has self.freq? + result = self._shallow_copy(dates)._with_freq("infer") return result else: left, right = other, self @@ -795,9 +809,8 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) - result = self._shallow_copy(dates) - result._set_freq("infer") # TODO: can we infer that it has self.freq? + result = self._shallow_copy(dates)._with_freq("infer") return result else: return left @@ -814,7 +827,7 @@ def _union(self, other, sort): if this._can_fast_union(other): result = this._fast_union(other, sort=sort) if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result else: i8self = Int64Index._simple_new(self.asi8, name=self.name) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1f565828ec7a5..37f14b3d154fd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -72,7 +72,7 @@ def _new_PeriodIndex(cls, **d): PeriodArray, wrap=True, ) -@inherit_names(["is_leap_year", "freq", "_format_native_types"], PeriodArray) +@inherit_names(["is_leap_year", "freq", "freqstr", "_format_native_types"], PeriodArray) class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in time. diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 765b948f13e96..7fafebd0a64f3 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -57,7 +57,6 @@ "std", "median", "_format_native_types", - "freq", ], TimedeltaArray, ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 1e93597d92a5d..7c58fc3ab0d0a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1017,7 +1017,8 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq obj = obj.copy() - obj.index._set_freq(self.freq) + obj.index = obj.index._with_freq(self.freq) + assert obj.index.freq == self.freq, (obj.index.freq, self.freq) return obj # do we have a regular frequency diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index beb16c9549cc4..6d09a9e0d642a 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -535,7 +535,7 @@ def test_tda_add_sub_index(self): def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture): # Result should be cast back to DatetimeArray dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) - dti._set_freq(None) + dti = dti._with_freq(None) tdi = dti - dti obj = tm.box_expected(tdi, box_df_fail) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e109c7a4f1c8d..892135bfa83d0 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -462,6 +462,5 @@ def test_split_non_utc(self): # GH 14042 indices = pd.date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) result = np.split(indices, indices_or_sections=[])[0] - expected = indices.copy() - expected._set_freq(None) + expected = indices._with_freq(None) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c55b0481c1041..66782299f6c5d 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -402,6 +402,20 @@ def test_freq_setter_errors(self): with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" + def test_freq_view_safe(self): + # Setting the freq for one DatetimeIndex shouldn't alter the freq + # for another that views the same data + + dti = pd.date_range("2016-01-01", periods=5) + dta = dti._data + + dti2 = DatetimeIndex(dta)._with_freq(None) + assert dti2.freq is None + + # Original was not altered + assert dti.freq == "D" + assert dta.freq == "D" + class TestBusinessDatetimeIndex: def setup_method(self, method): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index c088301097beb..abdb1347f8892 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -231,9 +231,7 @@ def test_intersection(self, tz, sort): ]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - assert result.name == expected.name assert result.freq == expected.freq - assert result.tz == expected.tz # non-monotonic base = DatetimeIndex( @@ -255,6 +253,7 @@ def test_intersection(self, tz, sort): # GH 7880 rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") expected4 = DatetimeIndex([], tz=tz, name="idx") + assert expected4.freq is None for (rng, expected) in [ (rng2, expected2), @@ -265,9 +264,7 @@ def test_intersection(self, tz, sort): if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - assert result.name == expected.name assert result.freq is None - assert result.tz == expected.tz def test_intersection_empty(self): # empty same freq GH2129 diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index aa1bf997fc66b..e411043165616 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -289,3 +289,17 @@ def test_freq_setter_errors(self): # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" + + def test_freq_view_safe(self): + # Setting the freq for one TimedeltaIndex shouldn't alter the freq + # for another that views the same data + + tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") + tda = tdi._data + + tdi2 = TimedeltaIndex(tda)._with_freq(None) + assert tdi2.freq is None + + # Original was not altered + assert tdi.freq == "2D" + assert tda.freq == "2D" From b541a8cb0dcd56844a1c200040dfb9094ec9f637 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 21 Apr 2020 17:19:34 -0700 Subject: [PATCH 2/4] merge --- ci/code_checks.sh | 8 +- doc/source/getting_started/index.rst | 2 +- doc/source/user_guide/cookbook.rst | 27 -- doc/source/whatsnew/v1.1.0.rst | 9 +- pandas/_libs/internals.pyx | 6 +- pandas/_libs/window/aggregations.pyx | 5 +- pandas/_testing.py | 33 ++- pandas/conftest.py | 60 +---- pandas/core/array_algos/transforms.py | 4 + pandas/core/arrays/categorical.py | 104 ++++---- pandas/core/arrays/datetimelike.py | 249 ++++++++++++------ pandas/core/computation/align.py | 2 +- pandas/core/dtypes/cast.py | 30 +-- pandas/core/dtypes/missing.py | 3 +- pandas/core/frame.py | 26 +- pandas/core/generic.py | 115 ++++---- pandas/core/groupby/generic.py | 23 +- pandas/core/indexes/datetimelike.py | 63 ++--- pandas/core/indexes/multi.py | 6 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/indexing.py | 2 + pandas/core/internals/blocks.py | 8 +- pandas/core/internals/managers.py | 16 +- pandas/core/series.py | 35 +-- pandas/core/util/numba_.py | 10 +- pandas/core/window/common.py | 1 + pandas/core/window/rolling.py | 13 +- pandas/io/common.py | 10 +- pandas/io/formats/csvs.py | 4 +- pandas/io/formats/info.py | 29 +- pandas/io/json/_json.py | 9 +- pandas/io/parquet.py | 4 +- pandas/io/pytables.py | 4 +- pandas/tests/arrays/boolean/test_function.py | 6 +- pandas/tests/arrays/categorical/test_algos.py | 4 +- pandas/tests/arrays/categorical/test_api.py | 2 +- .../tests/arrays/sparse/test_arithmetics.py | 5 - pandas/tests/dtypes/test_common.py | 48 ++-- pandas/tests/dtypes/test_inference.py | 166 ++++++------ pandas/tests/frame/indexing/test_at.py | 14 + pandas/tests/frame/test_api.py | 14 +- pandas/tests/frame/test_reshape.py | 6 +- pandas/tests/generic/test_generic.py | 24 +- pandas/tests/groupby/test_categorical.py | 3 +- pandas/tests/groupby/test_function.py | 21 -- pandas/tests/groupby/transform/test_numba.py | 5 +- .../tests/groupby/transform/test_transform.py | 3 - pandas/tests/indexes/common.py | 32 ++- pandas/tests/indexes/datetimelike.py | 9 +- pandas/tests/indexes/multi/test_compat.py | 6 +- pandas/tests/indexes/multi/test_reshape.py | 4 +- pandas/tests/indexes/multi/test_sorting.py | 9 +- .../tests/indexes/ranges/test_constructors.py | 4 +- pandas/tests/indexes/ranges/test_range.py | 3 +- .../indexes/timedeltas/test_timedelta.py | 4 +- .../multiindex/test_chaining_and_caching.py | 3 +- pandas/tests/indexing/multiindex/test_xs.py | 12 + pandas/tests/indexing/test_categorical.py | 7 +- pandas/tests/indexing/test_partial.py | 30 ++- pandas/tests/io/parser/test_network.py | 20 +- pandas/tests/io/test_gcs.py | 32 +-- pandas/tests/io/test_pickle.py | 1 - pandas/tests/reductions/test_reductions.py | 8 + pandas/tests/scalar/period/test_period.py | 7 - .../tests/scalar/timedelta/test_arithmetic.py | 1 + .../tests/series/indexing/test_alter_index.py | 3 +- pandas/tests/series/indexing/test_boolean.py | 5 - pandas/tests/series/indexing/test_getitem.py | 12 + pandas/tests/series/indexing/test_loc.py | 4 +- pandas/tests/series/test_arithmetic.py | 2 +- pandas/tests/test_algos.py | 3 +- pandas/tests/window/test_base_indexer.py | 18 ++ pandas/tests/window/test_numba.py | 3 +- pandas/util/_validators.py | 6 +- setup.cfg | 2 +- 75 files changed, 802 insertions(+), 691 deletions(-) create mode 100644 pandas/tests/frame/indexing/test_at.py diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 45b7db74fa409..427be459d9edc 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -150,7 +150,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then # Check for imports from pandas._testing instead of `import pandas._testing as tm` invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests RET=$(($RET + $?)) ; echo $MSG "DONE" - invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests + invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # No direct imports from conftest + invgrep -R --include="*.py*" -E "conftest import" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "import conftest" pandas/tests RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for use of exec' ; echo $MSG diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 3f15c91f83c6a..eb7ee000a9a86 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -398,7 +398,7 @@ data set, a sliding window of the data or grouped by categories. The latter is a
Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot` -from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command. +from long to wide format. With aggregations built-in, a pivot table is created with a single command. .. image:: ../_static/schemas/07_melt.svg :align: center diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 992cdfa5d7332..56ef6fc479f2c 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1333,33 +1333,6 @@ Values can be set to NaT using np.nan, similar to datetime y[1] = np.nan y -Aliasing axis names -------------------- - -To globally provide aliases for axis names, one can define these 2 functions: - -.. ipython:: python - - def set_axis_alias(cls, axis, alias): - if axis not in cls._AXIS_NUMBERS: - raise Exception("invalid axis [%s] for alias [%s]" % (axis, alias)) - cls._AXIS_ALIASES[alias] = axis - -.. ipython:: python - - def clear_axis_alias(cls, axis, alias): - if axis not in cls._AXIS_NUMBERS: - raise Exception("invalid axis [%s] for alias [%s]" % (axis, alias)) - cls._AXIS_ALIASES.pop(alias, None) - -.. ipython:: python - - set_axis_alias(pd.DataFrame, 'columns', 'myaxis2') - df2 = pd.DataFrame(np.random.randn(3, 2), columns=['c1', 'c2'], - index=['i1', 'i2', 'i3']) - df2.sum(axis='myaxis2') - clear_axis_alias(pd.DataFrame, 'columns', 'myaxis2') - Creating example data --------------------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a797090a83444..07849702c646d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -175,14 +175,16 @@ Other API changes - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`. - Previously a ``AttributeError`` was raised (:issue:`31126`) + Previously an ``AttributeError`` was raised (:issue:`31126`) +- :meth:`DataFrame.xs` now raises a ``TypeError`` if a ``level`` keyword is supplied and the axis is not a :class:`MultiIndex`. + Previously an ``AttributeError`` was raised (:issue:`33610`) - :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`) now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`) @@ -527,6 +529,7 @@ Indexing - Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) - Bug in :meth:`DatetimeIndex.insert` and :meth:`TimedeltaIndex.insert` causing index ``freq`` to be lost when setting an element into an empty :class:`Series` (:issue:33573`) - Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`) +- Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`) Missing ^^^^^^^ @@ -580,6 +583,8 @@ I/O - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) +- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) +- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index d3d8bead88d08..1e53b789aa05c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -141,10 +141,10 @@ cdef class BlockPlacement: return BlockPlacement(val) - def delete(self, loc) -> "BlockPlacement": + def delete(self, loc) -> BlockPlacement: return BlockPlacement(np.delete(self.as_array, loc, axis=0)) - def append(self, others) -> "BlockPlacement": + def append(self, others) -> BlockPlacement: if not len(others): return self @@ -185,7 +185,7 @@ cdef class BlockPlacement: val = newarr return BlockPlacement(val) - def add(self, other) -> "BlockPlacement": + def add(self, other) -> BlockPlacement: # We can get here with int or ndarray return self.iadd(other) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f3889039c095e..673820fd8464a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -843,7 +843,8 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, - ndarray[int64_t] end, int64_t minp, int64_t win): + ndarray[int64_t] end, int64_t minp, int64_t win=0): + # GH 32865. win argument kept for compatibility cdef: float64_t val, res, prev bint err = False @@ -858,7 +859,7 @@ def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, # actual skiplist ops outweigh any window computation costs output = np.empty(N, dtype=float) - if win == 0 or (end - start).max() == 0: + if (end - start).max() == 0: output[:] = NaN return output win = (end - start).max() diff --git a/pandas/_testing.py b/pandas/_testing.py index 1f6b645c821c8..4f957b7a55e3a 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -22,7 +22,7 @@ ) import pandas._libs.testing as _testing -from pandas._typing import FilePathOrBuffer, FrameOrSeries +from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries from pandas.compat import _get_lzma_file, _import_lzma from pandas.core.dtypes.common import ( @@ -73,6 +73,37 @@ _K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False +UNSIGNED_INT_DTYPES: List[Dtype] = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_EA_INT_DTYPES: List[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] +SIGNED_INT_DTYPES: List[Dtype] = [int, "int8", "int16", "int32", "int64"] +SIGNED_EA_INT_DTYPES: List[Dtype] = ["Int8", "Int16", "Int32", "Int64"] +ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES +ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES + +FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"] +COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"] +STRING_DTYPES: List[Dtype] = [str, "str", "U"] + +DATETIME64_DTYPES: List[Dtype] = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES: List[Dtype] = ["timedelta64[ns]", "m8[ns]"] + +BOOL_DTYPES = [bool, "bool"] +BYTES_DTYPES = [bytes, "bytes"] +OBJECT_DTYPES = [object, "object"] + +ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES +ALL_NUMPY_DTYPES = ( + ALL_REAL_DTYPES + + COMPLEX_DTYPES + + STRING_DTYPES + + DATETIME64_DTYPES + + TIMEDELTA64_DTYPES + + BOOL_DTYPES + + OBJECT_DTYPES + + BYTES_DTYPES +) + + # set testing_mode _testing_mode_warnings = (DeprecationWarning, ResourceWarning) diff --git a/pandas/conftest.py b/pandas/conftest.py index e1088dae3925a..70be6b5d9fcbc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -23,7 +23,6 @@ from decimal import Decimal import operator import os -from typing import List from dateutil.tz import tzlocal, tzutc import hypothesis @@ -32,7 +31,6 @@ import pytest from pytz import FixedOffset, utc -from pandas._typing import Dtype import pandas.util._test_decorators as td import pandas as pd @@ -864,39 +862,7 @@ def utc_fixture(request): # ---------------------------------------------------------------- # Dtypes # ---------------------------------------------------------------- - -UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] -UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"] -SIGNED_INT_DTYPES: List[Dtype] = [int, "int8", "int16", "int32", "int64"] -SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"] -ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES -ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES - -FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"] -COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"] -STRING_DTYPES: List[Dtype] = [str, "str", "U"] - -DATETIME64_DTYPES = ["datetime64[ns]", "M8[ns]"] -TIMEDELTA64_DTYPES = ["timedelta64[ns]", "m8[ns]"] - -BOOL_DTYPES = [bool, "bool"] -BYTES_DTYPES = [bytes, "bytes"] -OBJECT_DTYPES = [object, "object"] - -ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES -ALL_NUMPY_DTYPES = ( - ALL_REAL_DTYPES - + COMPLEX_DTYPES - + STRING_DTYPES - + DATETIME64_DTYPES - + TIMEDELTA64_DTYPES - + BOOL_DTYPES - + OBJECT_DTYPES - + BYTES_DTYPES -) - - -@pytest.fixture(params=STRING_DTYPES) +@pytest.fixture(params=tm.STRING_DTYPES) def string_dtype(request): """ Parametrized fixture for string dtypes. @@ -908,7 +874,7 @@ def string_dtype(request): return request.param -@pytest.fixture(params=BYTES_DTYPES) +@pytest.fixture(params=tm.BYTES_DTYPES) def bytes_dtype(request): """ Parametrized fixture for bytes dtypes. @@ -919,7 +885,7 @@ def bytes_dtype(request): return request.param -@pytest.fixture(params=OBJECT_DTYPES) +@pytest.fixture(params=tm.OBJECT_DTYPES) def object_dtype(request): """ Parametrized fixture for object dtypes. @@ -930,7 +896,7 @@ def object_dtype(request): return request.param -@pytest.fixture(params=DATETIME64_DTYPES) +@pytest.fixture(params=tm.DATETIME64_DTYPES) def datetime64_dtype(request): """ Parametrized fixture for datetime64 dtypes. @@ -941,7 +907,7 @@ def datetime64_dtype(request): return request.param -@pytest.fixture(params=TIMEDELTA64_DTYPES) +@pytest.fixture(params=tm.TIMEDELTA64_DTYPES) def timedelta64_dtype(request): """ Parametrized fixture for timedelta64 dtypes. @@ -952,7 +918,7 @@ def timedelta64_dtype(request): return request.param -@pytest.fixture(params=FLOAT_DTYPES) +@pytest.fixture(params=tm.FLOAT_DTYPES) def float_dtype(request): """ Parameterized fixture for float dtypes. @@ -964,7 +930,7 @@ def float_dtype(request): return request.param -@pytest.fixture(params=COMPLEX_DTYPES) +@pytest.fixture(params=tm.COMPLEX_DTYPES) def complex_dtype(request): """ Parameterized fixture for complex dtypes. @@ -976,7 +942,7 @@ def complex_dtype(request): return request.param -@pytest.fixture(params=SIGNED_INT_DTYPES) +@pytest.fixture(params=tm.SIGNED_INT_DTYPES) def sint_dtype(request): """ Parameterized fixture for signed integer dtypes. @@ -990,7 +956,7 @@ def sint_dtype(request): return request.param -@pytest.fixture(params=UNSIGNED_INT_DTYPES) +@pytest.fixture(params=tm.UNSIGNED_INT_DTYPES) def uint_dtype(request): """ Parameterized fixture for unsigned integer dtypes. @@ -1003,7 +969,7 @@ def uint_dtype(request): return request.param -@pytest.fixture(params=ALL_INT_DTYPES) +@pytest.fixture(params=tm.ALL_INT_DTYPES) def any_int_dtype(request): """ Parameterized fixture for any integer dtype. @@ -1021,7 +987,7 @@ def any_int_dtype(request): return request.param -@pytest.fixture(params=ALL_EA_INT_DTYPES) +@pytest.fixture(params=tm.ALL_EA_INT_DTYPES) def any_nullable_int_dtype(request): """ Parameterized fixture for any nullable integer dtype. @@ -1038,7 +1004,7 @@ def any_nullable_int_dtype(request): return request.param -@pytest.fixture(params=ALL_REAL_DTYPES) +@pytest.fixture(params=tm.ALL_REAL_DTYPES) def any_real_dtype(request): """ Parameterized fixture for any (purely) real numeric dtype. @@ -1059,7 +1025,7 @@ def any_real_dtype(request): return request.param -@pytest.fixture(params=ALL_NUMPY_DTYPES) +@pytest.fixture(params=tm.ALL_NUMPY_DTYPES) def any_numpy_dtype(request): """ Parameterized fixture for all numpy dtypes. diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index f775b6d733d9c..b8b234d937292 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -10,6 +10,10 @@ def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray: new_values = values + if periods == 0: + # TODO: should we copy here? + return new_values + # make sure array sent to np.roll is c_contiguous f_ordered = values.flags.f_contiguous if f_ordered: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index af07dee3b6838..cdd0717849e96 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,7 +27,6 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_object, - ensure_platform_int, is_categorical_dtype, is_datetime64_dtype, is_dict_like, @@ -51,6 +50,7 @@ from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d +from pandas.core.array_algos.transforms import shift from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com @@ -199,17 +199,6 @@ def contains(cat, key, container): return any(loc_ in container for loc_ in loc) -_codes_doc = """ -The category codes of this categorical. - -Level codes are an array if integer which are the positions of the real -values in the categories array. - -There is not setter, use the other categorical methods and the normal item -setter to change values in the categorical. -""" - - class Categorical(ExtensionArray, PandasObject): """ Represent a categorical variable in classic R / S-plus fashion. @@ -652,27 +641,26 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): return cls(codes, dtype=dtype, fastpath=True) - def _get_codes(self): + @property + def codes(self) -> np.ndarray: """ - Get the codes. + The category codes of this categorical. + + Codes are an array of integers which are the positions of the actual + values in the categories array. + + There is no setter, use the other categorical methods and the normal item + setter to change values in the categorical. Returns ------- - codes : integer array view - A non writable view of the `codes` array. + ndarray[int] + A non-writable view of the `codes` array. """ v = self._codes.view() v.flags.writeable = False return v - def _set_codes(self, codes): - """ - Not settable by the user directly - """ - raise ValueError("cannot set Categorical codes directly") - - codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc) - def _set_categories(self, categories, fastpath=False): """ Sets new categories inplace @@ -1241,23 +1229,41 @@ def shift(self, periods, fill_value=None): codes = self.codes if codes.ndim > 1: raise NotImplementedError("Categorical with ndim > 1.") - if np.prod(codes.shape) and (periods != 0): - codes = np.roll(codes, ensure_platform_int(periods), axis=0) - if isna(fill_value): - fill_value = -1 - elif fill_value in self.categories: - fill_value = self.categories.get_loc(fill_value) - else: - raise ValueError( - f"'fill_value={fill_value}' is not present " - "in this Categorical's categories" - ) - if periods > 0: - codes[:periods] = fill_value - else: - codes[periods:] = fill_value - return self.from_codes(codes, dtype=self.dtype) + fill_value = self._validate_fill_value(fill_value) + + codes = shift(codes.copy(), periods, axis=0, fill_value=fill_value) + + return self._constructor(codes, dtype=self.dtype, fastpath=True) + + def _validate_fill_value(self, fill_value): + """ + Convert a user-facing fill_value to a representation to use with our + underlying ndarray, raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : int + + Raises + ------ + ValueError + """ + + if isna(fill_value): + fill_value = -1 + elif fill_value in self.categories: + fill_value = self.categories.get_loc(fill_value) + else: + raise ValueError( + f"'fill_value={fill_value}' is not present " + "in this Categorical's categories" + ) + return fill_value def __array__(self, dtype=None) -> np.ndarray: """ @@ -1835,24 +1841,12 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None): """ indexer = np.asarray(indexer, dtype=np.intp) - dtype = self.dtype - - if isna(fill_value): - fill_value = -1 - elif allow_fill: + if allow_fill: # convert user-provided `fill_value` to codes - if fill_value in self.categories: - fill_value = self.categories.get_loc(fill_value) - else: - msg = ( - f"'fill_value' ('{fill_value}') is not in this " - "Categorical's categories." - ) - raise TypeError(msg) + fill_value = self._validate_fill_value(fill_value) codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) - result = type(self).from_codes(codes, dtype=dtype) - return result + return self._constructor(codes, dtype=self.dtype, fastpath=True) def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): # GH#27745 deprecate alias that other EAs dont have diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e3cdc898a88bf..cabe8922de1b3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -588,9 +588,6 @@ def __setitem__( # to a period in from_sequence). For DatetimeArray, it's Timestamp... # I don't know if mypy can do that, possibly with Generics. # https://mypy.readthedocs.io/en/latest/generics.html - if lib.is_scalar(value) and not isna(value): - value = com.maybe_box_datetimelike(value) - if is_list_like(value): is_slice = isinstance(key, slice) @@ -609,21 +606,7 @@ def __setitem__( elif not len(key): return - value = type(self)._from_sequence(value, dtype=self.dtype) - self._check_compatible_with(value, setitem=True) - value = value.asi8 - elif isinstance(value, self._scalar_type): - self._check_compatible_with(value, setitem=True) - value = self._unbox_scalar(value) - elif is_valid_nat_for_dtype(value, self.dtype): - value = iNaT - else: - msg = ( - f"'value' should be a '{self._scalar_type.__name__}', 'NaT', " - f"or array of those. Got '{type(value).__name__}' instead." - ) - raise TypeError(msg) - + value = self._validate_setitem_value(value) key = check_array_indexer(self, key) self._data[key] = value self._maybe_clear_freq() @@ -682,35 +665,6 @@ def unique(self): result = unique1d(self.asi8) return type(self)(result, dtype=self.dtype) - def _validate_fill_value(self, fill_value): - """ - If a fill_value is passed to `take` convert it to an i8 representation, - raising ValueError if this is not possible. - - Parameters - ---------- - fill_value : object - - Returns - ------- - fill_value : np.int64 - - Raises - ------ - ValueError - """ - if isna(fill_value): - fill_value = iNaT - elif isinstance(fill_value, self._recognized_scalars): - self._check_compatible_with(fill_value) - fill_value = self._scalar_type(fill_value) - fill_value = self._unbox_scalar(fill_value) - else: - raise ValueError( - f"'fill_value' should be a {self._scalar_type}. Got '{fill_value}'." - ) - return fill_value - def take(self, indices, allow_fill=False, fill_value=None): if allow_fill: fill_value = self._validate_fill_value(fill_value) @@ -769,6 +723,46 @@ def shift(self, periods=1, fill_value=None, axis=0): if not self.size or periods == 0: return self.copy() + fill_value = self._validate_shift_value(fill_value) + new_values = shift(self._data, periods, axis, fill_value) + + return type(self)._simple_new(new_values, dtype=self.dtype) + + # ------------------------------------------------------------------ + # Validation Methods + # TODO: try to de-duplicate these, ensure identical behavior + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, self._recognized_scalars): + self._check_compatible_with(fill_value) + fill_value = self._scalar_type(fill_value) + fill_value = self._unbox_scalar(fill_value) + else: + raise ValueError( + f"'fill_value' should be a {self._scalar_type}. Got '{fill_value}'." + ) + return fill_value + + def _validate_shift_value(self, fill_value): + # TODO(2.0): once this deprecation is enforced, used _validate_fill_value if is_valid_nat_for_dtype(fill_value, self.dtype): fill_value = NaT elif not isinstance(fill_value, self._recognized_scalars): @@ -786,15 +780,104 @@ def shift(self, periods=1, fill_value=None, axis=0): "will raise in a future version, pass " f"{self._scalar_type.__name__} instead.", FutureWarning, - stacklevel=9, + stacklevel=10, ) fill_value = new_fill fill_value = self._unbox_scalar(fill_value) + return fill_value - new_values = shift(self._data, periods, axis, fill_value) + def _validate_searchsorted_value(self, value): + if isinstance(value, str): + try: + value = self._scalar_from_string(value) + except ValueError as err: + raise TypeError( + "searchsorted requires compatible dtype or scalar" + ) from err - return type(self)._simple_new(new_values, dtype=self.dtype) + elif is_valid_nat_for_dtype(value, self.dtype): + value = NaT + + elif isinstance(value, self._recognized_scalars): + value = self._scalar_type(value) + + elif is_list_like(value) and not isinstance(value, type(self)): + value = array(value) + + if not type(self)._is_recognized_dtype(value): + raise TypeError( + "searchsorted requires compatible dtype or scalar, " + f"not {type(value).__name__}" + ) + + if not (isinstance(value, (self._scalar_type, type(self))) or (value is NaT)): + raise TypeError(f"Unexpected type for 'value': {type(value)}") + + if isinstance(value, type(self)): + self._check_compatible_with(value) + value = value.asi8 + else: + value = self._unbox_scalar(value) + + return value + + def _validate_setitem_value(self, value): + if lib.is_scalar(value) and not isna(value): + value = com.maybe_box_datetimelike(value) + + if is_list_like(value): + value = type(self)._from_sequence(value, dtype=self.dtype) + self._check_compatible_with(value, setitem=True) + value = value.asi8 + elif isinstance(value, self._scalar_type): + self._check_compatible_with(value, setitem=True) + value = self._unbox_scalar(value) + elif is_valid_nat_for_dtype(value, self.dtype): + value = iNaT + else: + msg = ( + f"'value' should be a '{self._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{type(value).__name__}' instead." + ) + raise TypeError(msg) + + return value + + def _validate_insert_value(self, value): + if isinstance(value, self._recognized_scalars): + value = self._scalar_type(value) + elif is_valid_nat_for_dtype(value, self.dtype): + # GH#18295 + value = NaT + elif lib.is_scalar(value) and isna(value): + raise TypeError( + f"cannot insert {type(self).__name__} with incompatible label" + ) + + return value + + def _validate_where_value(self, other): + if lib.is_scalar(other) and isna(other): + other = NaT.value + + else: + # Do type inference if necessary up front + # e.g. we passed PeriodIndex.values and got an ndarray of Periods + from pandas import Index + + other = Index(other) + + if is_categorical_dtype(other): + # e.g. we have a Categorical holding self.dtype + if is_dtype_equal(other.categories.dtype, self.dtype): + other = other._internal_get_values() + + if not is_dtype_equal(self.dtype, other.dtype): + raise TypeError(f"Where requires matching dtype, not {other.dtype}") + + other = other.view("i8") + return other # ------------------------------------------------------------------ # Additional array methods @@ -826,37 +909,7 @@ def searchsorted(self, value, side="left", sorter=None): indices : array of ints Array of insertion points with the same shape as `value`. """ - if isinstance(value, str): - try: - value = self._scalar_from_string(value) - except ValueError as e: - raise TypeError( - "searchsorted requires compatible dtype or scalar" - ) from e - - elif is_valid_nat_for_dtype(value, self.dtype): - value = NaT - - elif isinstance(value, self._recognized_scalars): - value = self._scalar_type(value) - - elif is_list_like(value) and not isinstance(value, type(self)): - value = array(value) - - if not type(self)._is_recognized_dtype(value): - raise TypeError( - "searchsorted requires compatible dtype or scalar, " - f"not {type(value).__name__}" - ) - - if not (isinstance(value, (self._scalar_type, type(self))) or (value is NaT)): - raise TypeError(f"Unexpected type for 'value': {type(value)}") - - if isinstance(value, type(self)): - self._check_compatible_with(value) - value = value.asi8 - else: - value = self._unbox_scalar(value) + value = self._validate_searchsorted_value(value) # TODO: Use datetime64 semantics for sorting, xref GH#29844 return self.asi8.searchsorted(value, side=side, sorter=sorter) @@ -1341,6 +1394,40 @@ def _time_shift(self, periods, freq=None): # to be passed explicitly. return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + @staticmethod + def _get_addsub_freq(self, other): + """ + Find the freq we expect the result of an addition operation to have. + """ + if is_period_dtype(self.dtype): + # Only used for ops that stay PeriodDtype + return self.freq + elif self.freq is None: + return None + elif lib.is_scalar(other) and isna(other): + return None + + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + new_freq = None + if isinstance(self.freq, Tick): + new_freq = self.freq + return new_freq + + elif isinstance(other, DateOffset): + # otherwise just DatetimeArray + return None # TODO: Should we infer if it matches self.freq * n? + elif isinstance(other, (datetime, np.datetime64)): + return self.freq + + elif is_timedelta64_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + elif is_object_dtype(other): + return None # TODO: is this quite right? sometimes we unpack singletons + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + else: + raise NotImplementedError + @unpack_zerodim_and_defer("__add__") def __add__(self, other): diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index e45d3ca66b6ec..82867cf9dcd29 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -38,7 +38,7 @@ def _align_core_single_unary_op( def _zip_axes_from_type( typ: Type[FrameOrSeries], new_axes: Sequence[int] ) -> Dict[str, int]: - axes = {name: new_axes[i] for i, name in typ._AXIS_NAMES.items()} + axes = {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} return axes diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index df70e73c6aadb..c9419fded5de9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -350,6 +350,7 @@ def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): """ A safe version of putmask that potentially upcasts the result. + The result is replaced with the first N elements of other, where N is the number of True values in mask. If the length of other is shorter than N, other will be repeated. @@ -399,24 +400,6 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): other = np.array(other, dtype=result.dtype) def changeit(): - - # try to directly set by expanding our array to full - # length of the boolean - try: - om = other[mask] - except (IndexError, TypeError): - # IndexError occurs in test_upcast when we have a boolean - # mask of the wrong shape - # TypeError occurs in test_upcast when `other` is a bool - pass - else: - om_at = om.astype(result.dtype) - if (om == om_at).all(): - new_result = result.values.copy() - new_result[mask] = om_at - result[:] = new_result - return result, False - # we are forced to change the dtype of the result as the input # isn't compatible r, _ = maybe_upcast(result, fill_value=other, copy=True) @@ -434,15 +417,8 @@ def changeit(): # we have a scalar or len 0 ndarray # and its nan and we are changing some values - if is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1): - if isna(other): - return changeit() - - # we have an ndarray and the masking has nans in it - else: - - if isna(other).any(): - return changeit() + if isna(other): + return changeit() try: np.place(result, mask, other) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 08a6d42042c1c..d329f4337de2e 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -208,11 +208,10 @@ def _use_inf_as_na(key): def _isna_ndarraylike(obj): - is_extension = is_extension_array_dtype(obj.dtype) values = getattr(obj, "_values", obj) dtype = values.dtype - if is_extension: + if is_extension_array_dtype(dtype): result = values.isna() elif is_string_dtype(dtype): result = _isna_string_dtype(values, dtype, old=False) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 85bb47485a2e7..202cb6488446e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -422,7 +422,12 @@ def _constructor(self) -> Type["DataFrame"]: @property def _constructor_expanddim(self): - raise NotImplementedError("Not supported for DataFrames!") + # GH#31549 raising NotImplementedError on a property causes trouble + # for `inspect` + def constructor(*args, **kwargs): + raise NotImplementedError("Not supported for DataFrames!") + + return constructor # ---------------------------------------------------------------------- # Constructors @@ -8787,8 +8792,11 @@ def isin(self, values) -> "DataFrame": # ---------------------------------------------------------------------- # Add index and columns _AXIS_ORDERS = ["index", "columns"] - _AXIS_NUMBERS = {"index": 0, "columns": 1} - _AXIS_NAMES = {0: "index", 1: "columns"} + _AXIS_TO_AXIS_NUMBER: Dict[Axis, int] = { + **NDFrame._AXIS_TO_AXIS_NUMBER, + 1: 1, + "columns": 1, + } _AXIS_REVERSED = True _AXIS_LEN = len(_AXIS_ORDERS) _info_axis_number = 1 @@ -8801,6 +8809,18 @@ def isin(self, values) -> "DataFrame": axis=0, doc="The column labels of the DataFrame." ) + @property + def _AXIS_NUMBERS(self) -> Dict[str, int]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NUMBERS + return {"index": 0, "columns": 1} + + @property + def _AXIS_NAMES(self) -> Dict[int, str]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NAMES + return {0: "index", 1: "columns"} + # ---------------------------------------------------------------------- # Add plotting methods to DataFrame plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a28f89a79a880..2f35a5b6f9a7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -67,7 +67,6 @@ is_dict_like, is_extension_array_dtype, is_float, - is_integer, is_list_like, is_number, is_numeric_dtype, @@ -302,19 +301,32 @@ def _data(self): # ---------------------------------------------------------------------- # Axis - _AXIS_ALIASES = {"rows": 0} - _AXIS_IALIASES = {0: "rows"} _stat_axis_number = 0 _stat_axis_name = "index" _ix = None _AXIS_ORDERS: List[str] - _AXIS_NUMBERS: Dict[str, int] - _AXIS_NAMES: Dict[int, str] + _AXIS_TO_AXIS_NUMBER: Dict[Axis, int] = {0: 0, "index": 0, "rows": 0} _AXIS_REVERSED: bool _info_axis_number: int _info_axis_name: str _AXIS_LEN: int + @property + def _AXIS_NUMBERS(self) -> Dict[str, int]: + """.. deprecated:: 1.1.0""" + warnings.warn( + "_AXIS_NUMBERS has been deprecated.", FutureWarning, stacklevel=3, + ) + return {"index": 0} + + @property + def _AXIS_NAMES(self) -> Dict[int, str]: + """.. deprecated:: 1.1.0""" + warnings.warn( + "_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=3, + ) + return {0: "index"} + def _construct_axes_dict(self, axes=None, **kwargs): """Return an axes dictionary for myself.""" d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} @@ -353,37 +365,24 @@ def _construct_axes_from_arguments( return axes, kwargs @classmethod - def _get_axis_number(cls, axis): - axis = cls._AXIS_ALIASES.get(axis, axis) - if is_integer(axis): - if axis in cls._AXIS_NAMES: - return axis - else: - try: - return cls._AXIS_NUMBERS[axis] - except KeyError: - pass - raise ValueError(f"No axis named {axis} for object type {cls.__name__}") + def _get_axis_number(cls, axis: Axis) -> int: + try: + return cls._AXIS_TO_AXIS_NUMBER[axis] + except KeyError: + raise ValueError(f"No axis named {axis} for object type {cls.__name__}") @classmethod - def _get_axis_name(cls, axis): - axis = cls._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, str): - if axis in cls._AXIS_NUMBERS: - return axis - else: - try: - return cls._AXIS_NAMES[axis] - except KeyError: - pass - raise ValueError(f"No axis named {axis} for object type {cls.__name__}") + def _get_axis_name(cls, axis: Axis) -> str: + axis_number = cls._get_axis_number(axis) + return cls._AXIS_ORDERS[axis_number] - def _get_axis(self, axis): - name = self._get_axis_name(axis) - return getattr(self, name) + def _get_axis(self, axis: Axis) -> Index: + axis_number = self._get_axis_number(axis) + assert axis_number in {0, 1} + return self.index if axis_number == 0 else self.columns @classmethod - def _get_block_manager_axis(cls, axis): + def _get_block_manager_axis(cls, axis: Axis) -> int: """Map the axis to the block_manager axis.""" axis = cls._get_axis_number(axis) if cls._AXIS_REVERSED: @@ -448,11 +447,11 @@ def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: } @property - def _info_axis(self): + def _info_axis(self) -> Index: return getattr(self, self._info_axis_name) @property - def _stat_axis(self): + def _stat_axis(self) -> Index: return getattr(self, self._stat_axis_name) @property @@ -590,7 +589,9 @@ def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: if copy: new_values = new_values.copy() - return self._constructor(new_values, *new_axes).__finalize__( + # ignore needed because of NDFrame constructor is different than + # DataFrame/Series constructors. + return self._constructor(new_values, *new_axes).__finalize__( # type: ignore self, method="swapaxes" ) @@ -701,10 +702,8 @@ def pop(self: FrameOrSeries, item) -> FrameOrSeries: """ result = self[item] del self[item] - try: + if self.ndim == 2: result._reset_cacher() - except AttributeError: - pass return result @@ -811,7 +810,7 @@ def squeeze(self, axis=None): >>> df_0a.squeeze() 1 """ - axis = self._AXIS_NAMES if axis is None else (self._get_axis_number(axis),) + axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) return self.iloc[ tuple( 0 if i in axis and len(a) == 1 else slice(None) @@ -1154,7 +1153,7 @@ class name result = self if inplace else self.copy(deep=copy) for axis in range(self._AXIS_LEN): - v = axes.get(self._AXIS_NAMES[axis]) + v = axes.get(self._get_axis_name(axis)) if v is lib.no_default: continue non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) @@ -3255,14 +3254,9 @@ def _maybe_update_cacher( if ref is None: del self._cacher else: - # Note: we need to call ref._maybe_cache_changed even in the - # case where it will raise. (Uh, not clear why) - try: + if len(self) == len(ref): + # otherwise, either self or ref has swapped in new arrays ref._maybe_cache_changed(cacher[0], self) - except AssertionError: - # ref._mgr.setitem can raise - # AssertionError because of shape mismatch - pass if verify_is_copy: self._check_setitem_copy(stacklevel=5, t="referant") @@ -3491,6 +3485,8 @@ class animal locomotion axis = self._get_axis_number(axis) labels = self._get_axis(axis) if level is not None: + if not isinstance(labels, MultiIndex): + raise TypeError("Index must be a MultiIndex") loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) # create the tuple of the indexer @@ -7628,11 +7624,11 @@ def at_time( axis = self._get_axis_number(axis) index = self._get_axis(axis) - try: - indexer = index.indexer_at_time(time, asof=asof) - except AttributeError as err: - raise TypeError("Index must be DatetimeIndex") from err + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + + indexer = index.indexer_at_time(time, asof=asof) return self._take_with_is_copy(indexer, axis=axis) def between_time( @@ -7711,16 +7707,12 @@ def between_time( axis = self._get_axis_number(axis) index = self._get_axis(axis) - try: - indexer = index.indexer_between_time( - start_time, - end_time, - include_start=include_start, - include_end=include_end, - ) - except AttributeError as err: - raise TypeError("Index must be DatetimeIndex") from err + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + indexer = index.indexer_between_time( + start_time, end_time, include_start=include_start, include_end=include_end, + ) return self._take_with_is_copy(indexer, axis=axis) def resample( @@ -8665,6 +8657,11 @@ def _where( else: align = self._get_axis_number(axis) == 1 + if align and isinstance(other, NDFrame): + other = other.reindex(self._info_axis, axis=self._info_axis_number) + if isinstance(cond, NDFrame): + cond = cond.reindex(self._info_axis, axis=self._info_axis_number) + block_axis = self._get_block_manager_axis(axis) if inplace: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c007d4920cbe7..504de404b2509 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -76,6 +76,7 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, check_kwargs_and_nopython, get_jit_arguments, jit_user_function, @@ -161,8 +162,6 @@ def pinner(cls): class SeriesGroupBy(GroupBy[Series]): _apply_whitelist = base.series_apply_whitelist - _numba_func_cache: Dict[Callable, Callable] = {} - def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj @@ -504,8 +503,9 @@ def _transform_general( nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) - numba_func = self._numba_func_cache.get( - func, jit_user_function(func, nopython, nogil, parallel) + cache_key = (func, "groupby_transform") + numba_func = NUMBA_FUNC_CACHE.get( + cache_key, jit_user_function(func, nopython, nogil, parallel) ) klass = type(self._selected_obj) @@ -516,8 +516,8 @@ def _transform_general( if engine == "numba": values, index = split_for_numba(group) res = numba_func(values, index, *args) - if func not in self._numba_func_cache: - self._numba_func_cache[func] = numba_func + if cache_key not in NUMBA_FUNC_CACHE: + NUMBA_FUNC_CACHE[cache_key] = numba_func else: res = func(group, *args, **kwargs) @@ -847,8 +847,6 @@ class DataFrameGroupBy(GroupBy[DataFrame]): _apply_whitelist = base.dataframe_apply_whitelist - _numba_func_cache: Dict[Callable, Callable] = {} - _agg_see_also_doc = dedent( """ See Also @@ -1397,8 +1395,9 @@ def _transform_general( nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) - numba_func = self._numba_func_cache.get( - func, jit_user_function(func, nopython, nogil, parallel) + cache_key = (func, "groupby_transform") + numba_func = NUMBA_FUNC_CACHE.get( + cache_key, jit_user_function(func, nopython, nogil, parallel) ) else: fast_path, slow_path = self._define_paths(func, *args, **kwargs) @@ -1409,8 +1408,8 @@ def _transform_general( if engine == "numba": values, index = split_for_numba(group) res = numba_func(values, index, *args) - if func not in self._numba_func_cache: - self._numba_func_cache[func] = numba_func + if cache_key not in NUMBA_FUNC_CACHE: + NUMBA_FUNC_CACHE[cache_key] = numba_func # Return the result as a DataFrame for concatenation later res = DataFrame(res, index=group.index, columns=group.columns) else: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 44dab263f0e94..295bcc7287c65 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -17,7 +17,6 @@ ensure_int64, ensure_platform_int, is_bool_dtype, - is_categorical_dtype, is_dtype_equal, is_integer, is_list_like, @@ -26,7 +25,6 @@ ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray @@ -80,7 +78,7 @@ def wrapper(left, right): cache=True, ) @inherit_names( - ["mean", "asi8", "_box_values", "_box_func"], DatetimeLikeArrayMixin, + ["mean", "asi8", "_box_func"], DatetimeLikeArrayMixin, ) class DatetimeIndexOpsMixin(ExtensionIndex): """ @@ -449,9 +447,27 @@ def get_indexer_non_unique(self, target): # -------------------------------------------------------------------- - __add__ = make_wrapped_arith_op("__add__") + def __add__(self, other): + add = make_wrapped_arith_op("__add__") + result = add(self, other) + if result is NotImplemented: + return NotImplemented + + new_freq = type(self._data)._get_addsub_freq(self, other) + result._freq = new_freq + return result + + def __sub__(self, other): + sub = make_wrapped_arith_op("__sub__") + result = sub(self, other) + if result is NotImplemented: + return NotImplemented + + new_freq = type(self._data)._get_addsub_freq(self, other) + result._freq = new_freq + return result + __radd__ = make_wrapped_arith_op("__radd__") - __sub__ = make_wrapped_arith_op("__sub__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") @@ -494,23 +510,7 @@ def isin(self, values, level=None): def where(self, cond, other=None): values = self.view("i8") - if is_scalar(other) and isna(other): - other = NaT.value - - else: - # Do type inference if necessary up front - # e.g. we passed PeriodIndex.values and got an ndarray of Periods - other = Index(other) - - if is_categorical_dtype(other): - # e.g. we have a Categorical holding self.dtype - if is_dtype_equal(other.categories.dtype, self.dtype): - other = other._internal_get_values() - - if not is_dtype_equal(self.dtype, other.dtype): - raise TypeError(f"Where requires matching dtype, not {other.dtype}") - - other = other.view("i8") + other = self._data._validate_where_value(other) result = np.where(cond, values, other).astype("i8") arr = type(self._data)._simple_new(result, dtype=self.dtype) @@ -576,7 +576,9 @@ def shift(self, periods=1, freq=None): Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ - result = self._data._time_shift(periods, freq=freq) + arr = self._data.view() + arr._freq = self.freq + result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- @@ -648,11 +650,6 @@ def _with_freq(self, freq): return index - def _with_freq(self, freq): - index = self.copy(deep=False) - index._set_freq(freq) - return index - def _shallow_copy(self, values=None, name: Label = lib.no_default): name = self.name if name is lib.no_default else name cache = self._cache.copy() if values is None else {} @@ -937,15 +934,7 @@ def insert(self, loc, item): ------- new_index : Index """ - if isinstance(item, self._data._recognized_scalars): - item = self._data._scalar_type(item) - elif is_valid_nat_for_dtype(item, self.dtype): - # GH 18295 - item = self._na_value - elif is_scalar(item) and isna(item): - raise TypeError( - f"cannot insert {type(self).__name__} with incompatible label" - ) + item = self._data._validate_insert_value(item) freq = None if isinstance(item, self._data._scalar_type) or item is NaT: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d411867af2ef8..ce5008630e53d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -37,7 +37,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.generic import ABCDataFrame, ABCDatetimeIndex, ABCTimedeltaIndex from pandas.core.dtypes.missing import array_equivalent, isna import pandas.core.algorithms as algos @@ -653,7 +653,9 @@ def values(self): vals = self._get_level_values(i) if is_categorical_dtype(vals): vals = vals._internal_get_values() - if isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, "_box_values"): + if isinstance(vals.dtype, ExtensionDtype) or isinstance( + vals, (ABCDatetimeIndex, ABCTimedeltaIndex) + ): vals = vals.astype(object) vals = np.array(vals, copy=False) values.append(vals) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 3e3591ee42c30..d2e743739db3d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -51,7 +51,7 @@ ], TimedeltaArray, ) -class TimedeltaIndex(DatetimeTimedeltaMixin, dtl.TimelikeOps): +class TimedeltaIndex(DatetimeTimedeltaMixin): """ Immutable ndarray of timedelta64 data, represented internally as int64, and which can be boxed to timedelta objects. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d100cb0bb70d8..303365f50c546 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -616,6 +616,8 @@ def _get_setitem_indexer(self, key): # invalid indexer type vs 'other' indexing errors if "cannot do" in str(e): raise + elif "unhashable type" in str(e): + raise raise IndexingError(key) from e def _ensure_listlike_indexer(self, key, axis=None): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 71efde1cc5380..366ea54a510ef 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -51,7 +51,6 @@ from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, - ABCExtensionArray, ABCIndexClass, ABCPandasArray, ABCSeries, @@ -2765,9 +2764,10 @@ def _safe_reshape(arr, new_shape): """ if isinstance(arr, ABCSeries): arr = arr._values - if not isinstance(arr, ABCExtensionArray): - # TODO(EA2D): special case not needed with 2D EAs - arr = arr.reshape(new_shape) + if not is_extension_array_dtype(arr.dtype): + # Note: this will include TimedeltaArray and tz-naive DatetimeArray + # TODO(EA2D): special case will be unnecessary with 2D EAs + arr = np.asarray(arr).reshape(new_shape) return arr diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dd950c0276646..6368a2498b04c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -373,23 +373,20 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: self._consolidate_inplace() - align_copy = False - if f == "where": - align_copy = True - aligned_args = {k: kwargs[k] for k in align_keys} for b in self.blocks: if aligned_args: - b_items = self.items[b.mgr_locs.indexer] for k, obj in aligned_args.items(): if isinstance(obj, (ABCSeries, ABCDataFrame)): - axis = obj._info_axis_number - kwargs[k] = obj.reindex( - b_items, axis=axis, copy=align_copy - )._values + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values + else: + kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values else: # otherwise we have an ndarray kwargs[k] = obj[b.mgr_locs.indexer] @@ -1125,6 +1122,7 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False): new_axis = self.items.insert(loc, item) if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype): + # TODO(EA2D): special case not needed with 2D EAs value = _safe_reshape(value, (1,) + value.shape) block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9182e378fbaeb..9ef865a964123 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -949,11 +949,8 @@ def _get_with(self, key): else: return self.iloc[key] - if isinstance(key, list): - # handle the dup indexing case GH#4246 - return self.loc[key] - - return self.reindex(key) + # handle the dup indexing case GH#4246 + return self.loc[key] def _get_values_tuple(self, key): # mpl hackaround @@ -1031,7 +1028,7 @@ def __setitem__(self, key, value): try: self._where(~key, value, inplace=True) except InvalidIndexError: - self._set_values(key.astype(np.bool_), value) + self.iloc[key] = value return else: @@ -1049,8 +1046,10 @@ def _set_with_engine(self, key, value): def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): + # extract_array so that if we set e.g. ser[-5:] = ser[:5] + # we get the first five values, and not 5 NaNs indexer = self.index._convert_slice_indexer(key, kind="getitem") - return self._set_values(indexer, value) + self.iloc[indexer] = extract_array(value, extract_numpy=True) else: assert not isinstance(key, tuple) @@ -1068,25 +1067,11 @@ def _set_with(self, key, value): # should be caught by the is_bool_indexer check in __setitem__ if key_type == "integer": if not self.index._should_fallback_to_positional(): - self._set_labels(key, value) + self.loc[key] = value else: - self._set_values(key, value) + self.iloc[key] = value else: - self._set_labels(key, value) - - def _set_labels(self, key, value): - key = com.asarray_tuplesafe(key) - indexer: np.ndarray = self.index.get_indexer(key) - mask = indexer == -1 - if mask.any(): - raise ValueError(f"{key[mask]} not contained in the index") - self._set_values(indexer, value) - - def _set_values(self, key, value): - if isinstance(key, Series): - key = key._values - self._mgr = self._mgr.setitem(indexer=key, value=value) - self._maybe_update_cacher() + self.loc[key] = value def _set_value(self, label, value, takeable: bool = False): """ @@ -4614,8 +4599,6 @@ def to_period(self, freq=None, copy=True) -> "Series": # ---------------------------------------------------------------------- # Add index _AXIS_ORDERS = ["index"] - _AXIS_NUMBERS = {"index": 0} - _AXIS_NAMES = {0: "index"} _AXIS_REVERSED = False _AXIS_LEN = len(_AXIS_ORDERS) _info_axis_number = 0 diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index c5b27b937a05b..29e74747881ae 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -1,4 +1,5 @@ """Common utilities for Numba operations""" +from distutils.version import LooseVersion import inspect import types from typing import Callable, Dict, Optional, Tuple @@ -8,6 +9,8 @@ from pandas._typing import FrameOrSeries from pandas.compat._optional import import_optional_dependency +NUMBA_FUNC_CACHE: Dict[Tuple[Callable, str], Callable] = dict() + def check_kwargs_and_nopython( kwargs: Optional[Dict] = None, nopython: Optional[bool] = None @@ -88,7 +91,12 @@ def jit_user_function( """ numba = import_optional_dependency("numba") - if isinstance(func, numba.targets.registry.CPUDispatcher): + if LooseVersion(numba.__version__) >= LooseVersion("0.49.0"): + is_jitted = numba.extending.is_jitted(func) + else: + is_jitted = isinstance(func, numba.targets.registry.CPUDispatcher) + + if is_jitted: # Don't jit a user passed jitted function numba_func = func else: diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 436585fe221dd..8707893dc20cf 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -78,6 +78,7 @@ def _apply( performing the original function call on the grouped object. """ kwargs.pop("floor", None) + kwargs.pop("original_func", None) # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 62f470060b039..3b14921528890 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -38,6 +38,7 @@ from pandas.core.base import DataError, PandasObject, SelectionMixin, ShallowMixin import pandas.core.common as com from pandas.core.indexes.api import Index, ensure_index +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE from pandas.core.window.common import ( WindowGroupByMixin, _doc_template, @@ -93,7 +94,6 @@ def __init__( self.win_freq = None self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() - self._numba_func_cache: Dict[Optional[str], Callable] = dict() @property def _constructor(self): @@ -505,7 +505,7 @@ def calc(x): result = np.asarray(result) if use_numba_cache: - self._numba_func_cache[name] = func + NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func if center: result = self._center_window(result, window) @@ -1279,9 +1279,10 @@ def apply( elif engine == "numba": if raw is False: raise ValueError("raw must be `True` when using the numba engine") - if func in self._numba_func_cache: + cache_key = (func, "rolling_apply") + if cache_key in NUMBA_FUNC_CACHE: # Return an already compiled version of roll_apply if available - apply_func = self._numba_func_cache[func] + apply_func = NUMBA_FUNC_CACHE[cache_key] else: apply_func = generate_numba_apply_func( args, kwargs, func, engine_kwargs @@ -1298,6 +1299,7 @@ def apply( name=func, use_numba_cache=engine == "numba", raw=raw, + original_func=func, ) def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): @@ -1429,7 +1431,8 @@ def mean(self, *args, **kwargs): def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") - window_func = partial(window_func, win=self._get_window()) + # GH 32865. Move max window size calculation to + # the median function implementation return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): diff --git a/pandas/io/common.py b/pandas/io/common.py index ff527de79c387..dd3d205ca90eb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -20,8 +20,7 @@ Type, Union, ) -from urllib.parse import ( # noqa - urlencode, +from urllib.parse import ( urljoin, urlparse as parse_url, uses_netloc, @@ -32,13 +31,6 @@ from pandas._typing import FilePathOrBuffer from pandas.compat import _get_lzma_file, _import_lzma -from pandas.errors import ( # noqa - AbstractMethodError, - DtypeWarning, - EmptyDataError, - ParserError, - ParserWarning, -) from pandas.core.dtypes.common import is_file_like diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 091f7662630ff..dcd764bec7426 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -62,7 +62,7 @@ def __init__( # Extract compression mode as given, if dict compression, self.compression_args = get_compression_method(compression) - self.path_or_buf, _, _, _ = get_filepath_or_buffer( + self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, mode=mode ) self.sep = sep @@ -223,6 +223,8 @@ def save(self) -> None: f.close() for _fh in handles: _fh.close() + elif self.should_close: + f.close() def _save_header(self): writer = self.writer diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 1fbc321160120..7b5e553cf394e 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -158,17 +158,18 @@ def info( lines.append(str(type(data))) lines.append(data.index._summary()) - if len(data.columns) == 0: + cols = data.columns + col_count = len(cols) + dtypes = data.dtypes + + if col_count == 0: lines.append(f"Empty {type(data).__name__}") fmt.buffer_put_lines(buf, lines) return - cols = data.columns - col_count = len(data.columns) - # hack if max_cols is None: - max_cols = get_option("display.max_info_columns", len(data.columns) + 1) + max_cols = get_option("display.max_info_columns", col_count + 1) max_rows = get_option("display.max_info_rows", len(data) + 1) @@ -179,7 +180,7 @@ def info( exceeds_info_cols = col_count > max_cols def _verbose_repr(): - lines.append(f"Data columns (total {len(data.columns)} columns):") + lines.append(f"Data columns (total {col_count} columns):") id_head = " # " column_head = "Column" @@ -196,9 +197,9 @@ def _verbose_repr(): header = _put_str(id_head, space_num) + _put_str(column_head, space) if show_counts: counts = data.count() - if len(cols) != len(counts): # pragma: no cover + if col_count != len(counts): # pragma: no cover raise AssertionError( - f"Columns must equal counts ({len(cols)} != {len(counts)})" + f"Columns must equal counts ({col_count} != {len(counts)})" ) count_header = "Non-Null Count" len_count = len(count_header) @@ -214,7 +215,7 @@ def _verbose_repr(): dtype_header = "Dtype" len_dtype = len(dtype_header) - max_dtypes = max(len(pprint_thing(k)) for k in data.dtypes) + max_dtypes = max(len(pprint_thing(k)) for k in dtypes) space_dtype = max(len_dtype, max_dtypes) header += _put_str(count_header, space_count) + _put_str( dtype_header, space_dtype @@ -228,14 +229,14 @@ def _verbose_repr(): + _put_str("-" * len_dtype, space_dtype) ) - for i, col in enumerate(data.columns): - dtype = data.dtypes.iloc[i] + for i, col in enumerate(cols): + dtype = dtypes[i] col = pprint_thing(col) line_no = _put_str(f" {i}", space_num) count = "" if show_counts: - count = counts.iloc[i] + count = counts[i] lines.append( line_no @@ -245,7 +246,7 @@ def _verbose_repr(): ) def _non_verbose_repr(): - lines.append(data.columns._summary(name="Columns")) + lines.append(cols._summary(name="Columns")) def _sizeof_fmt(num, size_qualifier): # returns size in human readable format @@ -266,7 +267,7 @@ def _sizeof_fmt(num, size_qualifier): _verbose_repr() # groupby dtype.name to collect e.g. Categorical columns - counts = data.dtypes.value_counts().groupby(lambda x: x.name).sum() + counts = dtypes.value_counts().groupby(lambda x: x.name).sum() dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] lines.append(f"dtypes: {', '.join(dtypes)}") diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 20724a498b397..eb7f15c78b671 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -867,12 +867,15 @@ def _convert_axes(self): """ Try to convert axes. """ - for axis in self.obj._AXIS_NUMBERS.keys(): + for axis_name in self.obj._AXIS_ORDERS: new_axis, result = self._try_convert_data( - axis, self.obj._get_axis(axis), use_dtypes=False, convert_dates=True + name=axis_name, + data=self.obj._get_axis(axis_name), + use_dtypes=False, + convert_dates=True, ) if result: - setattr(self.obj, axis, new_axis) + setattr(self.obj, axis_name, new_axis) def _try_convert_types(self): raise AbstractMethodError(self) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 33747d2a6dd83..068210eddcc1b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -92,7 +92,7 @@ def write( **kwargs, ): self.validate_dataframe(df) - path, _, _, _ = get_filepath_or_buffer(path, mode="wb") + path, _, _, should_close = get_filepath_or_buffer(path, mode="wb") from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} if index is not None: @@ -109,6 +109,8 @@ def write( ) else: self.api.parquet.write_table(table, path, compression=compression, **kwargs) + if should_close: + path.close() def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 425118694fa02..311d8d0d55341 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3712,7 +3712,7 @@ def _create_axes( # Now we can construct our new index axis idx = axes[0] a = obj.axes[idx] - axis_name = obj._AXIS_NAMES[idx] + axis_name = obj._get_axis_name(idx) new_index = _convert_index(axis_name, a, self.encoding, self.errors) new_index.axis = idx @@ -3919,7 +3919,7 @@ def process_axes(self, obj, selection: "Selection", columns=None): def process_filter(field, filt): - for axis_name in obj._AXIS_NAMES.values(): + for axis_name in obj._AXIS_ORDERS: axis_number = obj._get_axis_number(axis_name) axis_values = obj._get_axis(axis_name) assert axis_number is not None diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index c2987dc37b960..49a832f8dda20 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -54,7 +54,8 @@ def test_ufuncs_binary(ufunc): tm.assert_extension_array_equal(result, expected) # not handled types - with pytest.raises(TypeError): + msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__" + with pytest.raises(TypeError, match=msg): ufunc(a, "test") @@ -76,7 +77,8 @@ def test_ufuncs_unary(ufunc): @pytest.mark.parametrize("values", [[True, False], [True, None]]) def test_ufunc_reduce_raises(values): a = pd.array(values, dtype="boolean") - with pytest.raises(NotImplementedError): + msg = "The 'reduce' method is not supported" + with pytest.raises(NotImplementedError, match=msg): np.add.reduce(a) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 325fa476d70e6..45e0d503f30e7 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -184,8 +184,8 @@ def test_take_fill_value(self): def test_take_fill_value_new_raises(self): # https://github.com/pandas-dev/pandas/issues/23296 cat = pd.Categorical(["a", "b", "c"]) - xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." - with pytest.raises(TypeError, match=xpr): + xpr = r"'fill_value=d' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=xpr): cat.take([0, 1, -1], fill_value="d", allow_fill=True) def test_take_nd_deprecated(self): diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 691230620c2e8..6fce4b4145ff2 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -464,7 +464,7 @@ def test_codes_immutable(self): tm.assert_numpy_array_equal(c.codes, exp) # Assignments to codes should raise - with pytest.raises(ValueError, match="cannot set Categorical codes directly"): + with pytest.raises(AttributeError, match="can't set attribute"): c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") # changes in the codes array should raise diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index bf7d275e4ff7b..4ae1c1e6b63ce 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -31,11 +31,6 @@ def _assert(self, a, b): def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op): with np.errstate(invalid="ignore", divide="ignore"): - if op in [operator.floordiv, ops.rfloordiv]: - # FIXME: GH#13843 - if self._base == pd.Series and a.dtype.subtype == np.dtype("int64"): - pytest.xfail("Not defined/working. See GH#13843") - if mix: result = op(a, b_dense).to_dense() else: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 6e73e1542bb80..b9c8f3a8dd494 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -20,14 +20,6 @@ import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray -from pandas.conftest import ( - ALL_EA_INT_DTYPES, - ALL_INT_DTYPES, - SIGNED_EA_INT_DTYPES, - SIGNED_INT_DTYPES, - UNSIGNED_EA_INT_DTYPES, - UNSIGNED_INT_DTYPES, -) # EA & Actual Dtypes @@ -295,10 +287,10 @@ def test_is_string_dtype(): "dtype", integer_dtypes + [pd.Series([1, 2])] - + ALL_INT_DTYPES - + to_numpy_dtypes(ALL_INT_DTYPES) - + ALL_EA_INT_DTYPES - + to_ea_dtypes(ALL_EA_INT_DTYPES), + + tm.ALL_INT_DTYPES + + to_numpy_dtypes(tm.ALL_INT_DTYPES) + + tm.ALL_EA_INT_DTYPES + + to_ea_dtypes(tm.ALL_EA_INT_DTYPES), ) def test_is_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -327,10 +319,10 @@ def test_is_not_integer_dtype(dtype): "dtype", signed_integer_dtypes + [pd.Series([1, 2])] - + SIGNED_INT_DTYPES - + to_numpy_dtypes(SIGNED_INT_DTYPES) - + SIGNED_EA_INT_DTYPES - + to_ea_dtypes(SIGNED_EA_INT_DTYPES), + + tm.SIGNED_INT_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_DTYPES) + + tm.SIGNED_EA_INT_DTYPES + + to_ea_dtypes(tm.SIGNED_EA_INT_DTYPES), ) def test_is_signed_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -347,10 +339,10 @@ def test_is_signed_integer_dtype(dtype): np.array(["a", "b"]), np.array([], dtype=np.timedelta64), ] - + UNSIGNED_INT_DTYPES - + to_numpy_dtypes(UNSIGNED_INT_DTYPES) - + UNSIGNED_EA_INT_DTYPES - + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES), + + tm.UNSIGNED_INT_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_DTYPES) + + tm.UNSIGNED_EA_INT_DTYPES + + to_ea_dtypes(tm.UNSIGNED_EA_INT_DTYPES), ) def test_is_not_signed_integer_dtype(dtype): assert not com.is_signed_integer_dtype(dtype) @@ -363,10 +355,10 @@ def test_is_not_signed_integer_dtype(dtype): "dtype", unsigned_integer_dtypes + [pd.Series([1, 2], dtype=np.uint32)] - + UNSIGNED_INT_DTYPES - + to_numpy_dtypes(UNSIGNED_INT_DTYPES) - + UNSIGNED_EA_INT_DTYPES - + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES), + + tm.UNSIGNED_INT_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_DTYPES) + + tm.UNSIGNED_EA_INT_DTYPES + + to_ea_dtypes(tm.UNSIGNED_EA_INT_DTYPES), ) def test_is_unsigned_integer_dtype(dtype): assert com.is_unsigned_integer_dtype(dtype) @@ -383,10 +375,10 @@ def test_is_unsigned_integer_dtype(dtype): np.array(["a", "b"]), np.array([], dtype=np.timedelta64), ] - + SIGNED_INT_DTYPES - + to_numpy_dtypes(SIGNED_INT_DTYPES) - + SIGNED_EA_INT_DTYPES - + to_ea_dtypes(SIGNED_EA_INT_DTYPES), + + tm.SIGNED_INT_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_DTYPES) + + tm.SIGNED_EA_INT_DTYPES + + to_ea_dtypes(tm.SIGNED_EA_INT_DTYPES), ) def test_is_not_unsigned_integer_dtype(dtype): assert not com.is_unsigned_integer_dtype(dtype) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3d58df258e8e9..8c0580b7cf047 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -782,108 +782,91 @@ def test_datetime(self): index = Index(dates) assert index.inferred_type == "datetime64" - def test_infer_dtype_datetime(self): - - arr = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - + def test_infer_dtype_datetime64(self): arr = np.array( [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object ) assert lib.infer_dtype(arr, skipna=True) == "datetime64" - arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_datetime64_with_na(self, na_value): # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, np.datetime64("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime64" - - arr = np.array([n, datetime(2011, 1, 1)]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, pd.Timestamp("2011-01-02"), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, np.datetime64("2011-01-02"), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime64" - - arr = np.array([n, datetime(2011, 1, 1), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" + arr = np.array([na_value, np.datetime64("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" - # different type of nat - arr = np.array( - [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object - ) - assert lib.infer_dtype(arr, skipna=False) == "mixed" + arr = np.array([na_value, np.datetime64("2011-01-02"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" - arr = np.array( - [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object - ) + @pytest.mark.parametrize( + "arr", + [ + np.array( + [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object + ), + np.array( + [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object + ), + np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]), + np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]), + np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]), + np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]), + np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object), + np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object), + ], + ) + def test_infer_datetimelike_dtype_mixed(self, arr): assert lib.infer_dtype(arr, skipna=False) == "mixed" - # mixed datetime - arr = np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - # should be datetime? - arr = np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" - - arr = np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" - + def test_infer_dtype_mixed_integer(self): arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1]) assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" - arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" + @pytest.mark.parametrize( + "arr", + [ + np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), + np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), + np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]), + ], + ) + def test_infer_dtype_datetime(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "datetime" - arr = np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "time_stamp", [pd.Timestamp("2011-01-01"), datetime(2011, 1, 1)] + ) + def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): + # starts with nan + arr = np.array([na_value, time_stamp]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" - def test_infer_dtype_timedelta(self): + arr = np.array([na_value, time_stamp, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" - arr = np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]) + @pytest.mark.parametrize( + "arr", + [ + np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]), + np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), + np.array([timedelta(1), timedelta(2)]), + ], + ) + def test_infer_dtype_timedelta(self, arr): assert lib.infer_dtype(arr, skipna=True) == "timedelta" - arr = np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object) + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] + ) + def test_infer_dtype_timedelta_with_na(self, na_value, delta): + # starts with nan + arr = np.array([na_value, delta]) assert lib.infer_dtype(arr, skipna=True) == "timedelta" - arr = np.array([timedelta(1), timedelta(2)]) + arr = np.array([na_value, delta, na_value]) assert lib.infer_dtype(arr, skipna=True) == "timedelta" - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, Timedelta("1 days")]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, np.timedelta64(1, "D")]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, timedelta(1)]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, pd.Timedelta("1 days"), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, np.timedelta64(1, "D"), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, timedelta(1), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - # different type of nat - arr = np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object) - assert lib.infer_dtype(arr, skipna=False) == "mixed" - - arr = np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object) - assert lib.infer_dtype(arr, skipna=False) == "mixed" - def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="D")]) @@ -892,25 +875,26 @@ def test_infer_dtype_period(self): arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="M")]) assert lib.infer_dtype(arr, skipna=True) == "period" - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Period("2011-01", freq="D")]) - assert lib.infer_dtype(arr, skipna=True) == "period" - - arr = np.array([n, pd.Period("2011-01", freq="D"), n]) - assert lib.infer_dtype(arr, skipna=True) == "period" - - # different type of nat + def test_infer_dtype_period_mixed(self): arr = np.array( - [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object + [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object ) assert lib.infer_dtype(arr, skipna=False) == "mixed" arr = np.array( - [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object + [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object ) assert lib.infer_dtype(arr, skipna=False) == "mixed" + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_period_with_na(self, na_value): + # starts with nan + arr = np.array([na_value, pd.Period("2011-01", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([na_value, pd.Period("2011-01", freq="D"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "period" + @pytest.mark.parametrize( "data", [ diff --git a/pandas/tests/frame/indexing/test_at.py b/pandas/tests/frame/indexing/test_at.py new file mode 100644 index 0000000000000..9c2d88f1589c2 --- /dev/null +++ b/pandas/tests/frame/indexing/test_at.py @@ -0,0 +1,14 @@ +from datetime import datetime, timezone + +import pandas as pd +import pandas._testing as tm + + +def test_at_timezone(): + # https://github.com/pandas-dev/pandas/issues/33544 + result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]}) + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + expected = pd.DataFrame( + {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index ec8613faaa663..5cf74d3205a13 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -1,12 +1,13 @@ from copy import deepcopy import datetime +import inspect import pydoc import numpy as np import pytest from pandas.compat import PY37 -from pandas.util._test_decorators import async_mark +from pandas.util._test_decorators import async_mark, skip_if_no import pandas as pd from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range @@ -569,3 +570,14 @@ def test_cache_on_copy(self): assert df["a"].values[0] == -1 tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]})) + + @skip_if_no("jinja2") + def test_constructor_expanddim_lookup(self): + # GH#33628 accessing _constructor_expanddim should not + # raise NotImplementedError + df = DataFrame() + + inspect.getmembers(df) + + with pytest.raises(NotImplementedError, match="Not supported for DataFrames!"): + df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 9d3c40ce926d7..2e707342a0793 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -320,9 +320,9 @@ def test_unstack_fill_frame_categorical(self): ) tm.assert_frame_equal(result, expected) - # Fill with non-category results in a TypeError - msg = r"'fill_value' \('d'\) is not in" - with pytest.raises(TypeError, match=msg): + # Fill with non-category results in a ValueError + msg = r"'fill_value=d' is not present in" + with pytest.raises(ValueError, match=msg): data.unstack(fill_value="d") # Fill with category value replaces missing values as expected diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 2c8261a6dcc5a..05588ead54be4 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -86,7 +86,9 @@ def test_rename(self): def test_get_numeric_data(self): n = 4 - kwargs = {self._typ._AXIS_NAMES[i]: list(range(n)) for i in range(self._ndim)} + kwargs = { + self._typ._get_axis_name(i): list(range(n)) for i in range(self._ndim) + } # get the numeric data o = self._construct(n, **kwargs) @@ -901,12 +903,22 @@ def test_pipe_tuple_error(self): @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) def test_axis_classmethods(self, box): obj = box(dtype=object) - values = ( - list(box._AXIS_NAMES.keys()) - + list(box._AXIS_NUMBERS.keys()) - + list(box._AXIS_ALIASES.keys()) - ) + values = box._AXIS_TO_AXIS_NUMBER.keys() for v in values: assert obj._get_axis_number(v) == box._get_axis_number(v) assert obj._get_axis_name(v) == box._get_axis_name(v) assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_axis_names_deprecated(self, box): + # GH33637 + obj = box(dtype=object) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + obj._AXIS_NAMES + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_axis_numbers_deprecated(self, box): + # GH33637 + obj = box(dtype=object) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + obj._AXIS_NUMBERS diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index b2545e0e1b4d2..8e4a7141875bb 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -609,7 +609,8 @@ def test_bins_unequal_len(): bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here - with pytest.raises(ValueError): + msg = r"Length of grouper \(8\) and axis \(10\) must be same length" + with pytest.raises(ValueError, match=msg): series.groupby(bins).mean() diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 346de55f551df..93dd1bf23c308 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -16,7 +16,6 @@ NaT, Series, Timestamp, - _is_numpy_dev, date_range, isna, ) @@ -698,11 +697,6 @@ def test_numpy_compat(func): getattr(g, func)(foo=1) -@pytest.mark.xfail( - _is_numpy_dev, - reason="https://github.com/pandas-dev/pandas/issues/31992", - strict=False, -) def test_cummin(numpy_dtypes_for_minmax): dtype = numpy_dtypes_for_minmax[0] min_val = numpy_dtypes_for_minmax[1] @@ -751,11 +745,6 @@ def test_cummin(numpy_dtypes_for_minmax): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - _is_numpy_dev, - reason="https://github.com/pandas-dev/pandas/issues/31992", - strict=False, -) def test_cummin_all_nan_column(): base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) @@ -766,11 +755,6 @@ def test_cummin_all_nan_column(): tm.assert_frame_equal(expected, result) -@pytest.mark.xfail( - _is_numpy_dev, - reason="https://github.com/pandas-dev/pandas/issues/31992", - strict=False, -) def test_cummax(numpy_dtypes_for_minmax): dtype = numpy_dtypes_for_minmax[0] max_val = numpy_dtypes_for_minmax[2] @@ -819,11 +803,6 @@ def test_cummax(numpy_dtypes_for_minmax): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - _is_numpy_dev, - reason="https://github.com/pandas-dev/pandas/issues/31992", - strict=False, -) def test_cummax_all_nan_column(): base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 96078d0aa3662..28904b669ae56 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -4,6 +4,7 @@ from pandas import DataFrame import pandas._testing as tm +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE @td.skip_if_no("numba", "0.46.0") @@ -98,13 +99,13 @@ def func_2(values, index): expected = grouped.transform(lambda x: x + 1, engine="cython") tm.assert_equal(result, expected) # func_1 should be in the cache now - assert func_1 in grouped._numba_func_cache + assert (func_1, "groupby_transform") in NUMBA_FUNC_CACHE # Add func_2 to the cache result = grouped.transform(func_2, engine="numba", engine_kwargs=engine_kwargs) expected = grouped.transform(lambda x: x * 5, engine="cython") tm.assert_equal(result, expected) - assert func_2 in grouped._numba_func_cache + assert (func_2, "groupby_transform") in NUMBA_FUNC_CACHE # Retest func_1 which should use the cache result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 2295eb2297fa6..e1042bf35acc4 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -15,7 +15,6 @@ MultiIndex, Series, Timestamp, - _is_numpy_dev, concat, date_range, ) @@ -330,8 +329,6 @@ def test_transform_transformation_func(transformation_func): if transformation_func in ["pad", "backfill", "tshift", "cumcount"]: # These transformation functions are not yet covered in this test pytest.xfail("See GH 31269") - elif _is_numpy_dev and transformation_func in ["cummin"]: - pytest.xfail("https://github.com/pandas-dev/pandas/issues/31992") elif transformation_func == "fillna": test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 964cf320a422b..fd23e95106ab0 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -349,7 +349,8 @@ def test_take(self, indices): if not isinstance(indices, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): # GH 10791 - with pytest.raises(AttributeError): + msg = r"'(.*Index)' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): indices.freq def test_take_invalid_kwargs(self): @@ -537,9 +538,10 @@ def test_delete_base(self, indices): assert result.equals(expected) assert result.name == expected.name - with pytest.raises((IndexError, ValueError)): - # either depending on numpy version - indices.delete(len(indices)) + length = len(indices) + msg = f"index {length} is out of bounds for axis 0 with size {length}" + with pytest.raises(IndexError, match=msg): + indices.delete(length) def test_equals(self, indices): if isinstance(indices, IntervalIndex): @@ -787,13 +789,14 @@ def test_putmask_with_wrong_mask(self): # GH18368 index = self.create_index() - with pytest.raises(ValueError): + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): index.putmask(np.ones(len(index) + 1, np.bool), 1) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): index.putmask(np.ones(len(index) - 1, np.bool), 1) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): index.putmask("foo", 1) @pytest.mark.parametrize("copy", [True, False]) @@ -861,10 +864,21 @@ def test_getitem_2d_deprecated(self): def test_contains_requires_hashable_raises(self): idx = self.create_index() - with pytest.raises(TypeError, match="unhashable type"): + + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): [] in idx - with pytest.raises(TypeError): + msg = "|".join( + [ + r"unhashable type: 'dict'", + r"must be real number, not dict", + r"an integer is required", + r"\{\}", + r"pandas\._libs\.interval\.IntervalTree' is not iterable", + ] + ) + with pytest.raises(TypeError, match=msg): {} in idx._engine def test_copy_copies_cache(self): diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index ba10976a67e9a..85d670e9dbffa 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -11,14 +11,15 @@ class DatetimeLike(Base): def test_argmax_axis_invalid(self): # GH#23081 + msg = r"`axis` must be fewer than the number of dimensions \(1\)" rng = self.create_index() - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): rng.argmax(axis=1) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): rng.argmin(axis=2) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): rng.min(axis=-2) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): rng.max(axis=-3) def test_can_hold_identifiers(self): diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 9273de9c20412..d1f66af4a8e83 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -53,7 +53,11 @@ def test_boolean_context_compat2(): i2 = MultiIndex.from_tuples([("A", 1), ("A", 3)]) common = i1.intersection(i2) - with pytest.raises(ValueError): + msg = ( + r"The truth value of a MultiIndex is ambiguous\. " + r"Use a\.empty, a\.bool\(\), a\.item\(\), a\.any\(\) or a\.all\(\)\." + ) + with pytest.raises(ValueError, match=msg): bool(common) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index de32bd94be491..6d8a396119ef3 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -175,6 +175,6 @@ def test_delete_base(idx): assert result.equals(expected) assert result.name == expected.name - with pytest.raises((IndexError, ValueError)): - # Exception raised depends on NumPy version. + msg = "index 6 is out of bounds for axis 0 with size 6" + with pytest.raises(IndexError, match=msg): idx.delete(len(idx)) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index bb40612b9a55a..423bbed831b87 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -105,7 +105,11 @@ def test_unsortedindex(): expected = df.iloc[0] tm.assert_series_equal(result, expected) - with pytest.raises(UnsortedIndexError): + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): df.loc(axis=0)["z", slice("a")] df.sort_index(inplace=True) assert len(df.loc(axis=0)["z", :]) == 2 @@ -124,7 +128,8 @@ def test_unsortedindex_doc_examples(): with tm.assert_produces_warning(PerformanceWarning): dfm.loc[(1, "z")] - with pytest.raises(UnsortedIndexError): + msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" + with pytest.raises(UnsortedIndexError, match=msg): dfm.loc[(0, "y"):(1, "z")] assert not dfm.index.is_lexsorted() diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index b7f673428ae38..f573da44e99b3 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -149,9 +149,9 @@ def test_constructor_corner(self): tm.assert_index_equal(index, Index(arr)) # non-int raise Exception - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=r"Wrong type \"): RangeIndex("1", "10", "1") - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=r"Wrong type \"): RangeIndex(1.1, 10.2, 1.3) # invalid passed type diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 05422e7b4419f..2438cd352f86f 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -117,7 +117,8 @@ def test_delete(self): tm.assert_index_equal(result, expected) assert result.name == expected.name - with pytest.raises((IndexError, ValueError)): + msg = "index 5 is out of bounds for axis 0 with size 5" + with pytest.raises((IndexError, ValueError), match=msg): # either depending on numpy version result = idx.delete(len(idx)) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index f724badd51da8..637a2629dda8a 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -32,7 +32,9 @@ def indices(self): def create_index(self) -> TimedeltaIndex: index = pd.to_timedelta(range(5), unit="d")._with_freq("infer") assert index.freq == "D" - return index + pd.offsets.Hour(1) + ret = index + pd.offsets.Hour(1) + assert ret.freq == "D" + return ret def test_numeric_compat(self): # Dummy method to override super's version; this test is now done diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 8bfba8c12e934..d3b13336e2a44 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -23,7 +23,8 @@ def test_detect_chained_assignment(): multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) zed = DataFrame(events, index=["a", "b"], columns=multiind) - with pytest.raises(com.SettingWithCopyError): + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): zed["eyes"]["right"].fillna(value=555, inplace=True) diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index db8c0c643a623..ff748d755c063 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -243,3 +243,15 @@ def test_series_getitem_multiindex_xs_by_label(): result = s.xs("one", level="L2") tm.assert_series_equal(result, expected) + + +def test_xs_levels_raises(): + df = DataFrame({"A": [1, 2, 3]}) + + msg = "Index must be a MultiIndex" + with pytest.raises(TypeError, match=msg): + df.xs(0, level="as") + + s = df.A + with pytest.raises(TypeError, match=msg): + s.xs(0, level="as") diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 829ee61197ff2..c9634c4c90809 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -14,7 +14,6 @@ Series, Timedelta, Timestamp, - conftest, ) import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT @@ -752,9 +751,9 @@ def test_map_with_dict_or_series(self): [1.5, 2.5, 3.5], [-1.5, -2.5, -3.5], # numpy int/uint - *[np.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_INT_DTYPES], + *[np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_DTYPES], # numpy floats - *[np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in conftest.FLOAT_DTYPES], + *[np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_DTYPES], # numpy object np.array([1, "b", 3.5], dtype=object), # pandas scalars @@ -762,7 +761,7 @@ def test_map_with_dict_or_series(self): [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], # pandas Integer arrays - *[pd.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_EA_INT_DTYPES], + *[pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_EA_INT_DTYPES], # other pandas arrays pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, pd.date_range("2019-01-01", periods=3).array, diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 2ce07ec41758f..2e691c6fd76d8 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -43,10 +43,12 @@ def test_partial_setting(self): # iloc/iat raise s = s_orig.copy() - with pytest.raises(IndexError): + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): s.iloc[3] = 5.0 - with pytest.raises(IndexError): + msg = "index 3 is out of bounds for axis 0 with size 3" + with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 # ## frame ## @@ -58,10 +60,12 @@ def test_partial_setting(self): # iloc/iat raise df = df_orig.copy() - with pytest.raises(IndexError): + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): df.iloc[4, 2] = 5.0 - with pytest.raises(IndexError): + msg = "index 2 is out of bounds for axis 0 with size 2" + with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0 # row setting where it exists @@ -162,7 +166,8 @@ def test_partial_setting_mixed_dtype(self): # list-like must conform df = DataFrame(columns=["A", "B"]) - with pytest.raises(ValueError): + msg = "cannot set a row with mismatched columns" + with pytest.raises(ValueError, match=msg): df.loc[0] = [1, 2, 3] # TODO: #15657, these are left as object and not coerced @@ -330,10 +335,12 @@ def test_partial_set_invalid(self): df = orig.copy() # don't allow not string inserts - with pytest.raises(TypeError): + msg = "cannot insert DatetimeIndex with incompatible label" + + with pytest.raises(TypeError, match=msg): df.loc[100.0, :] = df.iloc[0] - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.loc[100, :] = df.iloc[0] # allow object conversion here @@ -375,13 +382,16 @@ def test_partial_set_empty_frame(self): # frame df = DataFrame() - with pytest.raises(ValueError): + msg = "cannot set a frame with no defined columns" + + with pytest.raises(ValueError, match=msg): df.loc[1] = 1 - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): df.loc[1] = Series([1], index=["foo"]) - with pytest.raises(ValueError): + msg = "cannot set a frame with no defined index and a scalar" + with pytest.raises(ValueError, match=msg): df.loc[:, 1] = 1 # these work as they don't really change diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b7164477c31f2..0f09659a24936 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -54,8 +54,8 @@ def tips_df(datapath): @pytest.mark.usefixtures("s3_resource") @td.skip_if_not_us_locale() class TestS3: + @td.skip_if_no("s3fs") def test_parse_public_s3_bucket(self, tips_df): - pytest.importorskip("s3fs") # more of an integration test due to the not-public contents portion # can probably mock this though. @@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df): assert not df.empty tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_s3_fails(self): + def test_read_s3_fails(self): with pytest.raises(IOError): read_csv("s3://nyqpug/asdf.csv") @@ -168,6 +168,22 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv("s3://cant_get_it/file.csv") + def test_write_s3_csv_fails(self, tips_df): + # GH 32486 + # Attempting to write to an invalid S3 path should raise + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") + + @td.skip_if_no("pyarrow") + def test_write_s3_parquet_fails(self, tips_df): + # GH 27679 + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet") + def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 557a9d5c13987..cf745fcc492a1 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -56,7 +56,15 @@ def open(*args): monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) df1.to_csv("gs://test/test.csv", index=True) - df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0) + + def mock_get_filepath_or_buffer(*args, **kwargs): + return StringIO(df1.to_csv()), None, None, False + + monkeypatch.setattr( + "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer + ) + + df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0) tm.assert_frame_equal(df1, df2) @@ -86,28 +94,6 @@ def open(self, path, mode="r", *args): ) -@td.skip_if_no("gcsfs") -def test_gcs_get_filepath_or_buffer(monkeypatch): - df1 = DataFrame( - { - "int": [1, 3], - "float": [2.0, np.nan], - "str": ["t", "s"], - "dt": date_range("2018-06-18", periods=2), - } - ) - - def mock_get_filepath_or_buffer(*args, **kwargs): - return (StringIO(df1.to_csv(index=False)), None, None, False) - - monkeypatch.setattr( - "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer - ) - df2 = read_csv("gs://test/test.csv", parse_dates=["dt"]) - - tm.assert_frame_equal(df1, df2) - - @td.skip_if_installed("gcsfs") def test_gcs_not_present_exception(): with pytest.raises(ImportError) as e: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 584a545769c4c..42b4ea5ad9aac 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -196,7 +196,6 @@ def test_pickle_path_localpath(): tm.assert_frame_equal(df, result) -@pytest.mark.xfail(reason="GitHub issue #31310", strict=False) def test_legacy_sparse_warning(datapath): """ diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 635b1a1cd1326..235aa8e4aa922 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1098,6 +1098,14 @@ def test_min_max_ordered(self, values, categories, function): expected = categories[0] if function == "min" else categories[2] assert result == expected + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_ordered_with_nan_only(self, function, skipna): + # https://github.com/pandas-dev/pandas/issues/33450 + cat = Series(Categorical([np.nan], categories=[1, 2], ordered=True)) + result = getattr(cat, function)(skipna=skipna) + assert result is np.nan + @pytest.mark.parametrize("function", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) def test_min_max_skipna(self, function, skipna): diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 304033f82c7a2..620fc1c006d93 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1,7 +1,5 @@ from datetime import date, datetime, timedelta -from distutils.version import StrictVersion -import dateutil import numpy as np import pytest import pytz @@ -1437,11 +1435,6 @@ def test_period_immutable(): per.freq = 2 * freq -@pytest.mark.xfail( - StrictVersion(dateutil.__version__.split(".dev")[0]) < StrictVersion("2.7.0"), - reason="Bug in dateutil < 2.7.0 when parsing old dates: Period('0001-01-07', 'D')", - strict=False, -) def test_small_year_parsing(): per1 = Period("0001-01-07", "D") assert per1.year == 1 diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 7baeb8f5673bc..eb22b715f9f4d 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -417,6 +417,7 @@ def test_td_div_numeric_scalar(self): np.float64("NaN"), marks=pytest.mark.xfail( _is_numpy_dev, + raises=RuntimeWarning, reason="https://github.com/pandas-dev/pandas/issues/31992", strict=False, ), diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index f2969e15fad8a..558f10d967df6 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -283,7 +283,8 @@ def test_reindex_datetimeindexes_tz_naive_and_aware(): idx = date_range("20131101", tz="America/Chicago", periods=7) newidx = date_range("20131103", periods=10, freq="H") s = Series(range(7), index=idx) - with pytest.raises(TypeError): + msg = "Cannot compare tz-naive and tz-aware timestamps" + with pytest.raises(TypeError, match=msg): s.reindex(newidx, method="ffill") diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 8878a4a6526af..e2b71b1f2f412 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -28,11 +28,6 @@ def test_getitem_boolean_empty(): # GH5877 # indexing with empty series - s = Series(["A", "B"]) - expected = Series(np.nan, index=["C"], dtype=object) - result = s[Series(["C"], dtype=object)] - tm.assert_series_equal(result, expected) - s = Series(["A", "B"]) expected = Series(dtype=object, index=Index([], dtype="int64")) result = s[Series([], dtype=object)] diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 2922f3c741320..9ce31f5f6decf 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -78,6 +78,18 @@ def test_getitem_median_slice_bug(self): class TestSeriesGetitemListLike: + @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) + def test_getitem_no_matches(self, box): + # GH#33462 we expect the same behavior for list/ndarray/Index/Series + ser = Series(["A", "B"]) + + key = Series(["C"], dtype=object) + key = box(key) + + msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]" + with pytest.raises(KeyError, match=msg): + ser[key] + def test_getitem_intlist_intindex_periodvalues(self): ser = Series(period_range("2000-01-01", periods=10, freq="D")) diff --git a/pandas/tests/series/indexing/test_loc.py b/pandas/tests/series/indexing/test_loc.py index 7d6b6c78cc492..368adcfb32215 100644 --- a/pandas/tests/series/indexing/test_loc.py +++ b/pandas/tests/series/indexing/test_loc.py @@ -131,8 +131,8 @@ def test_basic_setitem_with_labels(datetime_series): inds_notfound = [0, 4, 5, 6] arr_inds_notfound = np.array([0, 4, 5, 6]) - msg = r"\[5\] not contained in the index" - with pytest.raises(ValueError, match=msg): + msg = r"\[5\] not in index" + with pytest.raises(KeyError, match=msg): s[inds_notfound] = 0 with pytest.raises(Exception, match=msg): s[arr_inds_notfound] = 0 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 16163ee76ba63..c7a04843b8296 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -378,7 +378,7 @@ def test_ser_cmp_result_names(self, names, op): # datetime64tz dtype dti = dti.tz_localize("US/Central") - dti._set_freq("infer") # freq not preserved by tz_localize + dti = pd.DatetimeIndex(dti, freq="infer") # freq not preserved by tz_localize ser = Series(dti).rename(names[1]) result = op(ser, dti) assert result.name == names[2] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ad7028702ec8c..5f904241da485 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -31,7 +31,6 @@ compat, ) import pandas._testing as tm -from pandas.conftest import BYTES_DTYPES, STRING_DTYPES import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray import pandas.core.common as com @@ -362,7 +361,7 @@ def test_on_index_object(self): def test_dtype_preservation(self, any_numpy_dtype): # GH 15442 - if any_numpy_dtype in (BYTES_DTYPES + STRING_DTYPES): + if any_numpy_dtype in (tm.BYTES_DTYPES + tm.STRING_DTYPES): pytest.skip("skip string dtype") elif is_integer_dtype(any_numpy_dtype): data = [1, 2, 2] diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 1a3fe865d2a7a..aee47a085eb9c 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -141,6 +141,12 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ], {"ddof": 1}, ), + ( + "median", + np.median, + [1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan], + {}, + ), ], ) def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): @@ -162,7 +168,19 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) rolling = constructor(values).rolling(window=indexer, min_periods=2) result = getattr(rolling, func)() + + # Check that the function output matches the explicitly provided array expected = constructor(expected) tm.assert_equal(result, expected) + + # Check that the rolling function output matches applying an alternative + # function to the rolling window object expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs))) tm.assert_equal(result, expected2) + + # Check that the function output matches applying an alternative function + # if min_periods isn't specified + rolling3 = constructor(values).rolling(window=indexer) + result3 = getattr(rolling3, func)() + expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs))) + tm.assert_equal(result3, expected3) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index cc8aef1779b46..8ecf64b171df4 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -5,6 +5,7 @@ from pandas import Series import pandas._testing as tm +from pandas.core.util.numba_ import NUMBA_FUNC_CACHE @td.skip_if_no("numba", "0.46.0") @@ -59,7 +60,7 @@ def func_2(x): tm.assert_series_equal(result, expected) # func_1 should be in the cache now - assert func_1 in roll._numba_func_cache + assert (func_1, "rolling_apply") in NUMBA_FUNC_CACHE result = roll.apply( func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 682575cc9ed48..53a25eb321b73 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -257,7 +257,7 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): # like out = {'index': foo, 'columns': bar} # Start by validating for consistency - if "axis" in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS): + if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." raise TypeError(msg) @@ -302,8 +302,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): "a 'TypeError'." ) warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4) - out[data._AXIS_NAMES[0]] = args[0] - out[data._AXIS_NAMES[1]] = args[1] + out[data._get_axis_name(0)] = args[0] + out[data._get_axis_name(1)] = args[1] else: msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." raise TypeError(msg) diff --git a/setup.cfg b/setup.cfg index 6c42b27c7b015..f7370b6cef8d6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -119,7 +119,7 @@ combine_as_imports = True line_length = 88 force_sort_within_sections = True skip_glob = env, -skip = pandas/__init__.py,pandas/core/api.py +skip = pandas/__init__.py [mypy] ignore_missing_imports=True From a036604646eafbdb8e707345d2e658cc48905949 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 22 Apr 2020 09:44:48 -0700 Subject: [PATCH 3/4] wrap better --- pandas/core/arrays/datetimelike.py | 34 ------------ pandas/core/indexes/datetimelike.py | 80 ++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 53 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4c360e8e17c5b..3440df4b09c06 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1394,40 +1394,6 @@ def _time_shift(self, periods, freq=None): # to be passed explicitly. return self._generate_range(start=start, end=end, periods=None, freq=self.freq) - @staticmethod - def _get_addsub_freq(self, other): - """ - Find the freq we expect the result of an addition operation to have. - """ - if is_period_dtype(self.dtype): - # Only used for ops that stay PeriodDtype - return self.freq - elif self.freq is None: - return None - elif lib.is_scalar(other) and isna(other): - return None - - elif isinstance(other, (Tick, timedelta, np.timedelta64)): - new_freq = None - if isinstance(self.freq, Tick): - new_freq = self.freq - return new_freq - - elif isinstance(other, DateOffset): - # otherwise just DatetimeArray - return None # TODO: Should we infer if it matches self.freq * n? - elif isinstance(other, (datetime, np.datetime64)): - return self.freq - - elif is_timedelta64_dtype(other): - return None # TODO: shouldnt we be able to do self.freq + other.freq? - elif is_object_dtype(other): - return None # TODO: is this quite right? sometimes we unpack singletons - elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): - return None # TODO: shouldnt we be able to do self.freq + other.freq? - else: - raise NotImplementedError - @unpack_zerodim_and_defer("__add__") def __add__(self, other): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 295bcc7287c65..84fdb89c3d841 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1,7 +1,7 @@ """ Base and utility classes for tseries type pandas objects. """ -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, List, Optional, Union import numpy as np @@ -17,14 +17,18 @@ ensure_int64, ensure_platform_int, is_bool_dtype, + is_datetime64_any_dtype, is_dtype_equal, is_integer, is_list_like, + is_object_dtype, is_period_dtype, is_scalar, + is_timedelta64_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray @@ -42,6 +46,7 @@ from pandas.core.tools.timedeltas import to_timedelta from pandas.tseries.frequencies import DateOffset, to_offset +from pandas.tseries.offsets import Tick _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -72,6 +77,26 @@ def wrapper(left, right): return wrapper +def _make_wrapped_arith_op_with_freq(opname: str): + """ + Dispatch the operation to the underlying ExtensionArray, and infer + the appropriate frequency for the result. + """ + meth = make_wrapped_arith_op(opname) + + def wrapped(self, other): + result = meth(self, other) + if result is NotImplemented: + return NotImplemented + + new_freq = self._get_addsub_freq(other) + result._freq = new_freq + return result + + wrapped.__name__ = opname + return wrapped + + @inherit_names( ["inferred_freq", "_isnan", "_resolution", "resolution"], DatetimeLikeArrayMixin, @@ -446,27 +471,44 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing # -------------------------------------------------------------------- + # Arithmetic Methods - def __add__(self, other): - add = make_wrapped_arith_op("__add__") - result = add(self, other) - if result is NotImplemented: - return NotImplemented - - new_freq = type(self._data)._get_addsub_freq(self, other) - result._freq = new_freq - return result - - def __sub__(self, other): - sub = make_wrapped_arith_op("__sub__") - result = sub(self, other) - if result is NotImplemented: - return NotImplemented + def _get_addsub_freq(self, other) -> Optional[DateOffset]: + """ + Find the freq we expect the result of an addition/subtraction operation + to have. + """ + if is_period_dtype(self.dtype): + # Only used for ops that stay PeriodDtype + return self.freq + elif self.freq is None: + return None + elif lib.is_scalar(other) and isna(other): + return None - new_freq = type(self._data)._get_addsub_freq(self, other) - result._freq = new_freq - return result + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + new_freq = None + if isinstance(self.freq, Tick): + new_freq = self.freq + return new_freq + + elif isinstance(other, DateOffset): + # otherwise just DatetimeArray + return None # TODO: Should we infer if it matches self.freq * n? + elif isinstance(other, (datetime, np.datetime64)): + return self.freq + + elif is_timedelta64_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + elif is_object_dtype(other): + return None # TODO: is this quite right? sometimes we unpack singletons + elif is_datetime64_any_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + else: + raise NotImplementedError + __add__ = _make_wrapped_arith_op_with_freq("__add__") + __sub__ = _make_wrapped_arith_op_with_freq("__sub__") __radd__ = make_wrapped_arith_op("__radd__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") From 4117f1891b3db40995a1d0106aa2f79766fd65d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 25 Apr 2020 10:48:37 -0700 Subject: [PATCH 4/4] update usage --- pandas/tests/arithmetic/test_datetime64.py | 4 ++-- pandas/tests/indexes/common.py | 4 ++-- pandas/tests/indexes/datetimelike.py | 2 +- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/io/test_parquet.py | 6 +++--- pandas/tests/test_multilevel.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 79fcb5e9478c3..912ce2a953e0a 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2052,7 +2052,7 @@ def test_dti_add_tdi(self, tz_naive_fixture): dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) tdi = pd.timedelta_range("0 days", periods=10) expected = pd.date_range("2017-01-01", periods=10, tz=tz) - expected._set_freq(None) + expected = expected._with_freq(None) # add with TimdeltaIndex result = dti + tdi @@ -2074,7 +2074,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture): dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) tdi = pd.timedelta_range("0 days", periods=10) expected = pd.date_range("2017-01-01", periods=10, tz=tz) - expected._set_freq(None) + expected = expected._with_freq(None) # iadd with TimdeltaIndex result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 957ca138498d9..52b82b36d13be 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -267,7 +267,7 @@ def test_ensure_copied_data(self, indices): if is_datetime64tz_dtype(indices.dtype): result = result.tz_localize("UTC").tz_convert(indices.tz) if isinstance(indices, (DatetimeIndex, TimedeltaIndex)): - indices._set_freq(None) + indices = indices._with_freq(None) tm.assert_index_equal(indices, result) @@ -397,7 +397,7 @@ def test_where(self, klass): i = self.create_index() if isinstance(i, (pd.DatetimeIndex, pd.TimedeltaIndex)): # where does not preserve freq - i._set_freq(None) + i = i._with_freq(None) cond = [True] * len(i) result = i.where(klass(cond)) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 944358b1540b0..dfefdc0f211b1 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -82,7 +82,7 @@ def test_map_dictlike(self, mapper): # don't compare the freqs if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)): - expected._set_freq(None) + expected = expected._with_freq(None) result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index a8e08bbe9a2e9..691f542fc2084 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -131,7 +131,7 @@ def test_construction_with_alt(self, kwargs, tz_aware_fixture): def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): tz = tz_aware_fixture i = pd.date_range("20130101", periods=5, freq="H", tz=tz) - i._set_freq(None) + i = i._with_freq(None) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} if "tz" in kwargs: diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4d2e4b6a44cdb..603a0a452391c 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -134,7 +134,7 @@ def test_value_counts_unique(self, tz_naive_fixture): exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") - expected.index._set_freq(None) + expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e70a06cc5f582..280424c68297f 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -386,7 +386,7 @@ def test_write_index(self, engine): for index in indexes: df.index = index if isinstance(index, pd.DatetimeIndex): - index._set_freq(None) # freq doesnt round-trip + df.index = df.index._with_freq(None) # freq doesnt round-trip check_round_trip(df, engine, check_names=check_names) # index with meta-data @@ -465,7 +465,7 @@ def test_basic(self, pa, df_full): # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") - dti._set_freq(None) # freq doesnt round-trip + dti = dti._with_freq(None) # freq doesnt round-trip df["datetime_tz"] = dti df["bool_with_none"] = [True, None, True] @@ -634,7 +634,7 @@ def test_basic(self, fp, df_full): df = df_full dti = pd.date_range("20130101", periods=3, tz="US/Eastern") - dti._set_freq(None) # freq doesnt round-trip + dti = dti._with_freq(None) # freq doesnt round-trip df["datetime_tz"] = dti df["timedelta"] = pd.timedelta_range("1 day", periods=3) check_round_trip(df, fp) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 43461d465b9e7..1ba73292dc0b4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1505,7 +1505,7 @@ def test_set_index_datetime(self): tz="US/Eastern", ) idx3 = pd.date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") - idx3._set_freq(None) + idx3 = idx3._with_freq(None) df = df.set_index(idx1) df = df.set_index(idx2, append=True)