diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 64ee843f1d946..238f1382890c9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -996,7 +996,7 @@ def series_generator(self): # GH#35462 re-pin mgr in case setitem changed it ser._mgr = mgr mgr.set_values(arr) - ser.name = name + object.__setattr__(ser, "_name", name) yield ser @property diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index a9366f122bb5d..e0eef5d48e625 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -92,7 +92,7 @@ class BaseMaskedDtype(ExtensionDtype): """ - Base class for dtypes for BasedMaskedArray subclasses. + Base class for dtypes for BaseMaskedArray subclasses. """ name: str diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b79e915fa6c94..af1756470a9c0 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -319,8 +319,6 @@ def __init__(self, values, copy=False): super().__init__(values, copy=copy) if not isinstance(values, type(self)): self._validate() - # error: Incompatible types in assignment (expression has type "StringDtype", - # variable has type "PandasDtype") NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python")) def _validate(self): diff --git a/pandas/core/construction.py b/pandas/core/construction.py index a0db2c2157f8f..10637db555dd8 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -561,6 +561,10 @@ def sanitize_array( # it is lossy. dtype = cast(np.dtype, dtype) return np.array(data, dtype=dtype, copy=copy) + + # We ignore the dtype arg and return floating values, + # e.g. test_constructor_floating_data_int_dtype + # TODO: where is the discussion that documents the reason for this? subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here @@ -591,18 +595,21 @@ def sanitize_array( try: subarr = _try_cast(data, dtype, copy, raise_cast_failure) except ValueError: - casted = np.array(data, copy=False) - if casted.dtype.kind == "f" and is_integer_dtype(dtype): - # GH#40110 match the behavior we have if we passed - # a ndarray[float] to begin with - return sanitize_array( - casted, - index, - dtype, - copy=False, - raise_cast_failure=raise_cast_failure, - allow_2d=allow_2d, - ) + if is_integer_dtype(dtype): + casted = np.array(data, copy=False) + if casted.dtype.kind == "f": + # GH#40110 match the behavior we have if we passed + # a ndarray[float] to begin with + return sanitize_array( + casted, + index, + dtype, + copy=False, + raise_cast_failure=raise_cast_failure, + allow_2d=allow_2d, + ) + else: + raise else: raise else: @@ -762,7 +769,8 @@ def _try_cast( # data differently; _from_sequence treats naive as wall times, # while maybe_cast_to_datetime treats it as UTC # see test_maybe_promote_any_numpy_dtype_with_datetimetz - + # TODO(2.0): with deprecations enforced, should be able to remove + # special case. return maybe_cast_to_datetime(arr, dtype) # TODO: copy? diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 33dc377d7120f..249759274603d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1447,8 +1447,10 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: """ new_dtype: DtypeObj - if left.dtype.kind in ["i", "u", "c"] and ( - lib.is_integer(right) or lib.is_float(right) + if ( + isinstance(left, np.ndarray) + and left.dtype.kind in ["i", "u", "c"] + and (lib.is_integer(right) or lib.is_float(right)) ): # e.g. with int8 dtype and right=512, we want to end up with # np.int16, whereas infer_dtype_from(512) gives np.int64, @@ -1456,14 +1458,7 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f": right = int(right) - # Argument 1 to "result_type" has incompatible type "Union[ExtensionArray, - # ndarray[Any, Any]]"; expected "Union[Union[_SupportsArray[dtype[Any]], - # _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex, - # str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], - # Union[dtype[Any], None, Type[Any], _SupportsDType[dtype[Any]], str, - # Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex, - # Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]" - new_dtype = np.result_type(left, right) # type:ignore[arg-type] + new_dtype = np.result_type(left, right) else: dtype, _ = infer_dtype_from(right, pandas_dtype=True) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 22f34418b76fd..d08c59338af9e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -148,6 +148,7 @@ from pandas.core.arrays import ( DatetimeArray, ExtensionArray, + PeriodArray, TimedeltaArray, ) from pandas.core.arrays.sparse import SparseFrameAccessor @@ -900,7 +901,7 @@ def _can_fast_transpose(self) -> bool: @property def _values( # type: ignore[override] self, - ) -> np.ndarray | DatetimeArray | TimedeltaArray: + ) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray: """ Analogue to ._values that may return a 2D ExtensionArray. """ @@ -925,7 +926,7 @@ def _values( # type: ignore[override] return self.values # more generally, whatever we allow in NDArrayBackedExtensionBlock - arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr) + arr = cast("np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray", arr) return arr.T # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 29073cc23b133..0e8a7a5b7d9a9 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -705,8 +705,7 @@ def get_iterator( """ splitter = self._get_splitter(data, axis=axis) keys = self.group_keys_seq - for key, group in zip(keys, splitter): - yield key, group.__finalize__(data, method="groupby") + yield from zip(keys, splitter) @final def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter: @@ -714,8 +713,6 @@ def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter: Returns ------- Generator yielding subsetted objects - - __finalize__ has not been called for the subsetted objects returned. """ ids, _, ngroups = self.group_info return get_splitter(data, ids, ngroups, axis=axis) @@ -753,7 +750,6 @@ def apply( zipped = zip(group_keys, splitter) for key, group in zipped: - group = group.__finalize__(data, method="groupby") object.__setattr__(group, "name", key) # group might be modified @@ -1001,7 +997,6 @@ def _aggregate_series_pure_python( splitter = get_splitter(obj, ids, ngroups, axis=0) for i, group in enumerate(splitter): - group = group.__finalize__(obj, method="groupby") res = func(group) res = libreduction.extract_result(res) @@ -1244,8 +1239,8 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: # fastpath equivalent to `sdata.iloc[slice_obj]` mgr = sdata._mgr.get_slice(slice_obj) - # __finalize__ not called here, must be applied by caller if applicable - return sdata._constructor(mgr, name=sdata.name, fastpath=True) + ser = sdata._constructor(mgr, name=sdata.name, fastpath=True) + return ser.__finalize__(sdata, method="groupby") class FrameSplitter(DataSplitter): @@ -1256,8 +1251,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - # __finalize__ not called here, must be applied by caller if applicable - return sdata._constructor(mgr) + df = sdata._constructor(mgr) + return df.__finalize__(sdata, method="groupby") def get_splitter( diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ffd511b92da43..e4526af235a0e 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -272,6 +272,8 @@ def _should_fallback_to_positional(self) -> bool: @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): + # TODO(2.0): once #45324 deprecation is enforced we should be able + # to simplify this. if is_float_dtype(self.dtype): assert kind in ["loc", "getitem"] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13acc3ef10228..44d32e0cef66f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -805,12 +805,8 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool: @final def _convert_tuple(self, key: tuple) -> tuple: # Note: we assume _tupleize_axis_indexer has been called, if necessary. - keyidx = [] self._validate_key_length(key) - for i, k in enumerate(key): - idx = self._convert_to_indexer(k, axis=i) - keyidx.append(idx) - + keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)] return tuple(keyidx) @final diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3a39713f18d65..c010addd10404 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2148,8 +2148,6 @@ def _factorize_keys( rk = ensure_int64(rk.codes) elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype): - # error: Incompatible types in assignment (expression has type "ndarray", - # variable has type "ExtensionArray") lk, _ = lk._values_for_factorize() # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute diff --git a/pandas/core/series.py b/pandas/core/series.py index 59441dd5352ab..14a2881d7b291 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1162,12 +1162,11 @@ def _set_with_engine(self, key, value) -> None: self._mgr.setitem_inplace(loc, value) def _set_with(self, key, value): - # other: fancy integer or otherwise + # We got here via exception-handling off of InvalidIndexError, so + # key should always be listlike at this point. assert not isinstance(key, tuple) - if is_scalar(key): - key = [key] - elif is_iterator(key): + if is_iterator(key): # Without this, the call to infer_dtype will consume the generator key = list(key) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 7813182222d67..ac306b1687381 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -354,7 +354,7 @@ def nargsort( ascending: bool = True, na_position: str = "last", key: Callable | None = None, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> npt.NDArray[np.intp]: """ Intended to be a drop-in replacement for np.argsort which handles NaNs. @@ -369,7 +369,7 @@ def nargsort( ascending : bool, default True na_position : {'first', 'last'}, default 'last' key : Optional[Callable], default None - mask : Optional[np.ndarray], default None + mask : Optional[np.ndarray[bool]], default None Passed when called by ExtensionArray.argsort. Returns diff --git a/pandas/tests/io/pytables/__init__.py b/pandas/tests/io/pytables/__init__.py index cbf848a401dc4..c94db30adb61d 100644 --- a/pandas/tests/io/pytables/__init__.py +++ b/pandas/tests/io/pytables/__init__.py @@ -7,9 +7,9 @@ ), pytest.mark.filterwarnings(r"ignore:tostring\(\) is deprecated:DeprecationWarning"), pytest.mark.filterwarnings( - r"ignore:`np\.object` is a deprecated alias:DeprecationWarning" + r"ignore:`np\.object` is a deprecated alias.*:DeprecationWarning" ), pytest.mark.filterwarnings( - r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" + r"ignore:`np\.bool` is a deprecated alias.*:DeprecationWarning" ), ] diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 22c487cc338db..f375915b620ec 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -61,14 +61,14 @@ def test_asfreq(series_and_frame, freq, create_index): def test_asfreq_fill_value(series, create_index): # test for fill value during resampling, issue 3715 - s = series + ser = series - result = s.resample("1H").asfreq() - new_index = create_index(s.index[0], s.index[-1], freq="1H") - expected = s.reindex(new_index) + result = ser.resample("1H").asfreq() + new_index = create_index(ser.index[0], ser.index[-1], freq="1H") + expected = ser.reindex(new_index) tm.assert_series_equal(result, expected) - frame = s.to_frame("value") + frame = ser.to_frame("value") frame.iloc[1] = None result = frame.resample("1H").asfreq(fill_value=4.0) new_index = create_index(frame.index[0], frame.index[-1], freq="1H") @@ -104,11 +104,11 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method): if resample_method == "ohlc": pytest.skip("need to test for ohlc from GH13083") - s = empty_series_dti - result = getattr(s.resample(freq), resample_method)() + ser = empty_series_dti + result = getattr(ser.resample(freq), resample_method)() - expected = s.copy() - expected.index = _asfreq_compat(s.index, freq) + expected = ser.copy() + expected.index = _asfreq_compat(ser.index, freq) tm.assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq @@ -123,17 +123,18 @@ def test_resample_nat_index_series(request, freq, series, resample_method): if freq == "M": request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails")) - s = series.copy() - s.index = PeriodIndex([NaT] * len(s), freq=freq) - result = getattr(s.resample(freq), resample_method)() + ser = series.copy() + ser.index = PeriodIndex([NaT] * len(ser), freq=freq) + rs = ser.resample(freq) + result = getattr(rs, resample_method)() if resample_method == "ohlc": expected = DataFrame( - [], index=s.index[:0].copy(), columns=["open", "high", "low", "close"] + [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] ) tm.assert_frame_equal(result, expected, check_dtype=False) else: - expected = s[:0].copy() + expected = ser[:0].copy() tm.assert_series_equal(result, expected, check_dtype=False) tm.assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq @@ -226,9 +227,9 @@ def test_resample_empty_dtypes(index, dtype, resample_method): @pytest.mark.parametrize("freq", ["M", "D", "H"]) def test_apply_to_empty_series(empty_series_dti, freq): # GH 14313 - s = empty_series_dti - result = s.resample(freq).apply(lambda x: 1) - expected = s.resample(freq).apply(np.sum) + ser = empty_series_dti + result = ser.resample(freq).apply(lambda x: 1) + expected = ser.resample(freq).apply(np.sum) tm.assert_series_equal(result, expected, check_dtype=False) @@ -248,9 +249,9 @@ def test_resampler_is_iterable(series): @all_ts def test_resample_quantile(series): # GH 15023 - s = series + ser = series q = 0.75 freq = "H" - result = s.resample(freq).quantile(q) - expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name) + result = ser.resample(freq).quantile(q) + expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2411b47e9dd7e..f9376da02c510 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -754,6 +754,53 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype) with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) + def test_constructor_floating_data_int_dtype(self, frame_or_series): + # GH#40110 + arr = np.random.randn(2) + + if frame_or_series is Series: + # Long-standing behavior has been to ignore the dtype on these; + # not clear if this is what we want long-term + expected = frame_or_series(arr) + + res = frame_or_series(arr, dtype="i8") + tm.assert_equal(res, expected) + + res = frame_or_series(list(arr), dtype="i8") + tm.assert_equal(res, expected) + + else: + msg = "passing float-dtype values and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + # DataFrame will behave like Series + frame_or_series(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + # DataFrame will behave like Series + frame_or_series(list(arr), dtype="i8") + + # When we have NaNs, we silently ignore the integer dtype + arr[0] = np.nan + expected = frame_or_series(arr) + msg = "passing float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj = frame_or_series(arr, dtype="i8") + tm.assert_equal(obj, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + # same behavior if we pass list instead of the ndarray + obj = frame_or_series(list(arr), dtype="i8") + tm.assert_equal(obj, expected) + + # float array that can be losslessly cast to integers + arr = np.array([1.0, 2.0], dtype="float64") + expected = frame_or_series(arr.astype("i8")) + + obj = frame_or_series(arr, dtype="i8") + tm.assert_equal(obj, expected) + + obj = frame_or_series(list(arr), dtype="i8") + tm.assert_equal(obj, expected) + @td.skip_if_no("dask") def test_construct_dask_float_array_int_dtype_match_ndarray(self): # GH#40110 make sure we treat a float-dtype dask array with the same diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index f1db4a2fc22cb..cf5ba9a9fc112 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -14,6 +14,11 @@ from pandas import DataFrame import pandas._testing as tm +# geopandas, xarray, fsspec, fastparquet all produce these +pytestmark = pytest.mark.filterwarnings( + "ignore:distutils Version classes are deprecated.*:DeprecationWarning" +) + def import_module(name): # we *only* want to skip if the module is truly not available