Skip to content

CLN: assorted follow-ups #45402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 17, 2022
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ def series_generator(self):
# GH#35462 re-pin mgr in case setitem changed it
ser._mgr = mgr
mgr.set_values(arr)
ser.name = name
object.__setattr__(ser, "_name", name)
yield ser

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@

class BaseMaskedDtype(ExtensionDtype):
"""
Base class for dtypes for BasedMaskedArray subclasses.
Base class for dtypes for BaseMaskedArray subclasses.
"""

name: str
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,6 @@ def __init__(self, values, copy=False):
super().__init__(values, copy=copy)
if not isinstance(values, type(self)):
self._validate()
# error: Incompatible types in assignment (expression has type "StringDtype",
# variable has type "PandasDtype")
NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))

def _validate(self):
Expand Down
34 changes: 21 additions & 13 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,10 @@ def sanitize_array(
# it is lossy.
dtype = cast(np.dtype, dtype)
return np.array(data, dtype=dtype, copy=copy)

# We ignore the dtype arg and return floating values,
# e.g. test_constructor_floating_data_int_dtype
# TODO: where is the discussion that documents the reason for this?
subarr = np.array(data, copy=copy)
else:
# we will try to copy by-definition here
Expand Down Expand Up @@ -591,18 +595,21 @@ def sanitize_array(
try:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
except ValueError:
casted = np.array(data, copy=False)
if casted.dtype.kind == "f" and is_integer_dtype(dtype):
# GH#40110 match the behavior we have if we passed
# a ndarray[float] to begin with
return sanitize_array(
casted,
index,
dtype,
copy=False,
raise_cast_failure=raise_cast_failure,
allow_2d=allow_2d,
)
if is_integer_dtype(dtype):
casted = np.array(data, copy=False)
if casted.dtype.kind == "f":
# GH#40110 match the behavior we have if we passed
# a ndarray[float] to begin with
return sanitize_array(
casted,
index,
dtype,
copy=False,
raise_cast_failure=raise_cast_failure,
allow_2d=allow_2d,
)
else:
raise
else:
raise
else:
Expand Down Expand Up @@ -762,7 +769,8 @@ def _try_cast(
# data differently; _from_sequence treats naive as wall times,
# while maybe_cast_to_datetime treats it as UTC
# see test_maybe_promote_any_numpy_dtype_with_datetimetz

# TODO(2.0): with deprecations enforced, should be able to remove
# special case.
return maybe_cast_to_datetime(arr, dtype)
# TODO: copy?

Expand Down
15 changes: 5 additions & 10 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,23 +1447,18 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
"""
new_dtype: DtypeObj

if left.dtype.kind in ["i", "u", "c"] and (
lib.is_integer(right) or lib.is_float(right)
if (
isinstance(left, np.ndarray)
and left.dtype.kind in ["i", "u", "c"]
and (lib.is_integer(right) or lib.is_float(right))
):
# e.g. with int8 dtype and right=512, we want to end up with
# np.int16, whereas infer_dtype_from(512) gives np.int64,
# which will make us upcast too far.
if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":
right = int(right)

# Argument 1 to "result_type" has incompatible type "Union[ExtensionArray,
# ndarray[Any, Any]]"; expected "Union[Union[_SupportsArray[dtype[Any]],
# _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex,
# str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]],
# Union[dtype[Any], None, Type[Any], _SupportsDType[dtype[Any]], str,
# Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex,
# Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"
new_dtype = np.result_type(left, right) # type:ignore[arg-type]
new_dtype = np.result_type(left, right)

else:
dtype, _ = infer_dtype_from(right, pandas_dtype=True)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.sparse import SparseFrameAccessor
Expand Down Expand Up @@ -900,7 +901,7 @@ def _can_fast_transpose(self) -> bool:
@property
def _values( # type: ignore[override]
self,
) -> np.ndarray | DatetimeArray | TimedeltaArray:
) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray:
"""
Analogue to ._values that may return a 2D ExtensionArray.
"""
Expand All @@ -925,7 +926,7 @@ def _values( # type: ignore[override]
return self.values

# more generally, whatever we allow in NDArrayBackedExtensionBlock
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray", arr)
return arr.T

# ----------------------------------------------------------------------
Expand Down
15 changes: 5 additions & 10 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,17 +705,14 @@ def get_iterator(
"""
splitter = self._get_splitter(data, axis=axis)
keys = self.group_keys_seq
for key, group in zip(keys, splitter):
yield key, group.__finalize__(data, method="groupby")
yield from zip(keys, splitter)

@final
def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter:
"""
Returns
-------
Generator yielding subsetted objects

__finalize__ has not been called for the subsetted objects returned.
"""
ids, _, ngroups = self.group_info
return get_splitter(data, ids, ngroups, axis=axis)
Expand Down Expand Up @@ -753,7 +750,6 @@ def apply(
zipped = zip(group_keys, splitter)

for key, group in zipped:
group = group.__finalize__(data, method="groupby")
object.__setattr__(group, "name", key)

# group might be modified
Expand Down Expand Up @@ -1001,7 +997,6 @@ def _aggregate_series_pure_python(
splitter = get_splitter(obj, ids, ngroups, axis=0)

for i, group in enumerate(splitter):
group = group.__finalize__(obj, method="groupby")
res = func(group)
res = libreduction.extract_result(res)

Expand Down Expand Up @@ -1244,8 +1239,8 @@ class SeriesSplitter(DataSplitter):
def _chop(self, sdata: Series, slice_obj: slice) -> Series:
# fastpath equivalent to `sdata.iloc[slice_obj]`
mgr = sdata._mgr.get_slice(slice_obj)
# __finalize__ not called here, must be applied by caller if applicable
return sdata._constructor(mgr, name=sdata.name, fastpath=True)
ser = sdata._constructor(mgr, name=sdata.name, fastpath=True)
return ser.__finalize__(sdata, method="groupby")


class FrameSplitter(DataSplitter):
Expand All @@ -1256,8 +1251,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
# else:
# return sdata.iloc[:, slice_obj]
mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
# __finalize__ not called here, must be applied by caller if applicable
return sdata._constructor(mgr)
df = sdata._constructor(mgr)
return df.__finalize__(sdata, method="groupby")


def get_splitter(
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ def _should_fallback_to_positional(self) -> bool:

@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
# TODO(2.0): once #45324 deprecation is enforced we should be able
# to simplify this.
if is_float_dtype(self.dtype):
assert kind in ["loc", "getitem"]

Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,12 +805,8 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
@final
def _convert_tuple(self, key: tuple) -> tuple:
# Note: we assume _tupleize_axis_indexer has been called, if necessary.
keyidx = []
self._validate_key_length(key)
for i, k in enumerate(key):
idx = self._convert_to_indexer(k, axis=i)
keyidx.append(idx)

keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
return tuple(keyidx)

@final
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2148,8 +2148,6 @@ def _factorize_keys(
rk = ensure_int64(rk.codes)

elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype):
# error: Incompatible types in assignment (expression has type "ndarray",
# variable has type "ExtensionArray")
lk, _ = lk._values_for_factorize()

# error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,12 +1162,11 @@ def _set_with_engine(self, key, value) -> None:
self._mgr.setitem_inplace(loc, value)

def _set_with(self, key, value):
# other: fancy integer or otherwise
# We got here via exception-handling off of InvalidIndexError, so
# key should always be listlike at this point.
assert not isinstance(key, tuple)

if is_scalar(key):
key = [key]
elif is_iterator(key):
if is_iterator(key):
# Without this, the call to infer_dtype will consume the generator
key = list(key)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def nargsort(
ascending: bool = True,
na_position: str = "last",
key: Callable | None = None,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> npt.NDArray[np.intp]:
"""
Intended to be a drop-in replacement for np.argsort which handles NaNs.
Expand All @@ -369,7 +369,7 @@ def nargsort(
ascending : bool, default True
na_position : {'first', 'last'}, default 'last'
key : Optional[Callable], default None
mask : Optional[np.ndarray], default None
mask : Optional[np.ndarray[bool]], default None
Passed when called by ExtensionArray.argsort.

Returns
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/pytables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
),
pytest.mark.filterwarnings(r"ignore:tostring\(\) is deprecated:DeprecationWarning"),
pytest.mark.filterwarnings(
r"ignore:`np\.object` is a deprecated alias:DeprecationWarning"
r"ignore:`np\.object` is a deprecated alias.*:DeprecationWarning"
),
pytest.mark.filterwarnings(
r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning"
r"ignore:`np\.bool` is a deprecated alias.*:DeprecationWarning"
),
]
41 changes: 21 additions & 20 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@ def test_asfreq(series_and_frame, freq, create_index):
def test_asfreq_fill_value(series, create_index):
# test for fill value during resampling, issue 3715

s = series
ser = series

result = s.resample("1H").asfreq()
new_index = create_index(s.index[0], s.index[-1], freq="1H")
expected = s.reindex(new_index)
result = ser.resample("1H").asfreq()
new_index = create_index(ser.index[0], ser.index[-1], freq="1H")
expected = ser.reindex(new_index)
tm.assert_series_equal(result, expected)

frame = s.to_frame("value")
frame = ser.to_frame("value")
frame.iloc[1] = None
result = frame.resample("1H").asfreq(fill_value=4.0)
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
Expand Down Expand Up @@ -104,11 +104,11 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
if resample_method == "ohlc":
pytest.skip("need to test for ohlc from GH13083")

s = empty_series_dti
result = getattr(s.resample(freq), resample_method)()
ser = empty_series_dti
result = getattr(ser.resample(freq), resample_method)()

expected = s.copy()
expected.index = _asfreq_compat(s.index, freq)
expected = ser.copy()
expected.index = _asfreq_compat(ser.index, freq)

tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
Expand All @@ -123,17 +123,18 @@ def test_resample_nat_index_series(request, freq, series, resample_method):
if freq == "M":
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))

s = series.copy()
s.index = PeriodIndex([NaT] * len(s), freq=freq)
result = getattr(s.resample(freq), resample_method)()
ser = series.copy()
ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
rs = ser.resample(freq)
result = getattr(rs, resample_method)()

if resample_method == "ohlc":
expected = DataFrame(
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = s[:0].copy()
expected = ser[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
Expand Down Expand Up @@ -226,9 +227,9 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_apply_to_empty_series(empty_series_dti, freq):
# GH 14313
s = empty_series_dti
result = s.resample(freq).apply(lambda x: 1)
expected = s.resample(freq).apply(np.sum)
ser = empty_series_dti
result = ser.resample(freq).apply(lambda x: 1)
expected = ser.resample(freq).apply(np.sum)

tm.assert_series_equal(result, expected, check_dtype=False)

Expand All @@ -248,9 +249,9 @@ def test_resampler_is_iterable(series):
@all_ts
def test_resample_quantile(series):
# GH 15023
s = series
ser = series
q = 0.75
freq = "H"
result = s.resample(freq).quantile(q)
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
result = ser.resample(freq).quantile(q)
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
tm.assert_series_equal(result, expected)
Loading