Skip to content

Commit 56498ae

Browse files
authored
CLN: assorted follow-ups (#45402)
1 parent c2fc924 commit 56498ae

File tree

16 files changed

+119
-74
lines changed

16 files changed

+119
-74
lines changed

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,7 @@ def series_generator(self):
996996
# GH#35462 re-pin mgr in case setitem changed it
997997
ser._mgr = mgr
998998
mgr.set_values(arr)
999-
ser.name = name
999+
object.__setattr__(ser, "_name", name)
10001000
yield ser
10011001

10021002
@property

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393

9494
class BaseMaskedDtype(ExtensionDtype):
9595
"""
96-
Base class for dtypes for BasedMaskedArray subclasses.
96+
Base class for dtypes for BaseMaskedArray subclasses.
9797
"""
9898

9999
name: str

pandas/core/arrays/string_.py

-2
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,6 @@ def __init__(self, values, copy=False):
319319
super().__init__(values, copy=copy)
320320
if not isinstance(values, type(self)):
321321
self._validate()
322-
# error: Incompatible types in assignment (expression has type "StringDtype",
323-
# variable has type "PandasDtype")
324322
NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
325323

326324
def _validate(self):

pandas/core/construction.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,10 @@ def sanitize_array(
561561
# it is lossy.
562562
dtype = cast(np.dtype, dtype)
563563
return np.array(data, dtype=dtype, copy=copy)
564+
565+
# We ignore the dtype arg and return floating values,
566+
# e.g. test_constructor_floating_data_int_dtype
567+
# TODO: where is the discussion that documents the reason for this?
564568
subarr = np.array(data, copy=copy)
565569
else:
566570
# we will try to copy by-definition here
@@ -591,18 +595,21 @@ def sanitize_array(
591595
try:
592596
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
593597
except ValueError:
594-
casted = np.array(data, copy=False)
595-
if casted.dtype.kind == "f" and is_integer_dtype(dtype):
596-
# GH#40110 match the behavior we have if we passed
597-
# a ndarray[float] to begin with
598-
return sanitize_array(
599-
casted,
600-
index,
601-
dtype,
602-
copy=False,
603-
raise_cast_failure=raise_cast_failure,
604-
allow_2d=allow_2d,
605-
)
598+
if is_integer_dtype(dtype):
599+
casted = np.array(data, copy=False)
600+
if casted.dtype.kind == "f":
601+
# GH#40110 match the behavior we have if we passed
602+
# a ndarray[float] to begin with
603+
return sanitize_array(
604+
casted,
605+
index,
606+
dtype,
607+
copy=False,
608+
raise_cast_failure=raise_cast_failure,
609+
allow_2d=allow_2d,
610+
)
611+
else:
612+
raise
606613
else:
607614
raise
608615
else:
@@ -762,7 +769,8 @@ def _try_cast(
762769
# data differently; _from_sequence treats naive as wall times,
763770
# while maybe_cast_to_datetime treats it as UTC
764771
# see test_maybe_promote_any_numpy_dtype_with_datetimetz
765-
772+
# TODO(2.0): with deprecations enforced, should be able to remove
773+
# special case.
766774
return maybe_cast_to_datetime(arr, dtype)
767775
# TODO: copy?
768776

pandas/core/dtypes/cast.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -1447,23 +1447,18 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
14471447
"""
14481448
new_dtype: DtypeObj
14491449

1450-
if left.dtype.kind in ["i", "u", "c"] and (
1451-
lib.is_integer(right) or lib.is_float(right)
1450+
if (
1451+
isinstance(left, np.ndarray)
1452+
and left.dtype.kind in ["i", "u", "c"]
1453+
and (lib.is_integer(right) or lib.is_float(right))
14521454
):
14531455
# e.g. with int8 dtype and right=512, we want to end up with
14541456
# np.int16, whereas infer_dtype_from(512) gives np.int64,
14551457
# which will make us upcast too far.
14561458
if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":
14571459
right = int(right)
14581460

1459-
# Argument 1 to "result_type" has incompatible type "Union[ExtensionArray,
1460-
# ndarray[Any, Any]]"; expected "Union[Union[_SupportsArray[dtype[Any]],
1461-
# _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex,
1462-
# str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]],
1463-
# Union[dtype[Any], None, Type[Any], _SupportsDType[dtype[Any]], str,
1464-
# Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex,
1465-
# Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"
1466-
new_dtype = np.result_type(left, right) # type:ignore[arg-type]
1461+
new_dtype = np.result_type(left, right)
14671462

14681463
else:
14691464
dtype, _ = infer_dtype_from(right, pandas_dtype=True)

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
from pandas.core.arrays import (
149149
DatetimeArray,
150150
ExtensionArray,
151+
PeriodArray,
151152
TimedeltaArray,
152153
)
153154
from pandas.core.arrays.sparse import SparseFrameAccessor
@@ -900,7 +901,7 @@ def _can_fast_transpose(self) -> bool:
900901
@property
901902
def _values( # type: ignore[override]
902903
self,
903-
) -> np.ndarray | DatetimeArray | TimedeltaArray:
904+
) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray:
904905
"""
905906
Analogue to ._values that may return a 2D ExtensionArray.
906907
"""
@@ -925,7 +926,7 @@ def _values( # type: ignore[override]
925926
return self.values
926927

927928
# more generally, whatever we allow in NDArrayBackedExtensionBlock
928-
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
929+
arr = cast("np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray", arr)
929930
return arr.T
930931

931932
# ----------------------------------------------------------------------

pandas/core/groupby/ops.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -705,17 +705,14 @@ def get_iterator(
705705
"""
706706
splitter = self._get_splitter(data, axis=axis)
707707
keys = self.group_keys_seq
708-
for key, group in zip(keys, splitter):
709-
yield key, group.__finalize__(data, method="groupby")
708+
yield from zip(keys, splitter)
710709

711710
@final
712711
def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter:
713712
"""
714713
Returns
715714
-------
716715
Generator yielding subsetted objects
717-
718-
__finalize__ has not been called for the subsetted objects returned.
719716
"""
720717
ids, _, ngroups = self.group_info
721718
return get_splitter(data, ids, ngroups, axis=axis)
@@ -753,7 +750,6 @@ def apply(
753750
zipped = zip(group_keys, splitter)
754751

755752
for key, group in zipped:
756-
group = group.__finalize__(data, method="groupby")
757753
object.__setattr__(group, "name", key)
758754

759755
# group might be modified
@@ -1001,7 +997,6 @@ def _aggregate_series_pure_python(
1001997
splitter = get_splitter(obj, ids, ngroups, axis=0)
1002998

1003999
for i, group in enumerate(splitter):
1004-
group = group.__finalize__(obj, method="groupby")
10051000
res = func(group)
10061001
res = libreduction.extract_result(res)
10071002

@@ -1244,8 +1239,8 @@ class SeriesSplitter(DataSplitter):
12441239
def _chop(self, sdata: Series, slice_obj: slice) -> Series:
12451240
# fastpath equivalent to `sdata.iloc[slice_obj]`
12461241
mgr = sdata._mgr.get_slice(slice_obj)
1247-
# __finalize__ not called here, must be applied by caller if applicable
1248-
return sdata._constructor(mgr, name=sdata.name, fastpath=True)
1242+
ser = sdata._constructor(mgr, name=sdata.name, fastpath=True)
1243+
return ser.__finalize__(sdata, method="groupby")
12491244

12501245

12511246
class FrameSplitter(DataSplitter):
@@ -1256,8 +1251,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
12561251
# else:
12571252
# return sdata.iloc[:, slice_obj]
12581253
mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
1259-
# __finalize__ not called here, must be applied by caller if applicable
1260-
return sdata._constructor(mgr)
1254+
df = sdata._constructor(mgr)
1255+
return df.__finalize__(sdata, method="groupby")
12611256

12621257

12631258
def get_splitter(

pandas/core/indexes/numeric.py

+2
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ def _should_fallback_to_positional(self) -> bool:
272272

273273
@doc(Index._convert_slice_indexer)
274274
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
275+
# TODO(2.0): once #45324 deprecation is enforced we should be able
276+
# to simplify this.
275277
if is_float_dtype(self.dtype):
276278
assert kind in ["loc", "getitem"]
277279

pandas/core/indexing.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -805,12 +805,8 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
805805
@final
806806
def _convert_tuple(self, key: tuple) -> tuple:
807807
# Note: we assume _tupleize_axis_indexer has been called, if necessary.
808-
keyidx = []
809808
self._validate_key_length(key)
810-
for i, k in enumerate(key):
811-
idx = self._convert_to_indexer(k, axis=i)
812-
keyidx.append(idx)
813-
809+
keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
814810
return tuple(keyidx)
815811

816812
@final

pandas/core/reshape/merge.py

-2
Original file line numberDiff line numberDiff line change
@@ -2148,8 +2148,6 @@ def _factorize_keys(
21482148
rk = ensure_int64(rk.codes)
21492149

21502150
elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype):
2151-
# error: Incompatible types in assignment (expression has type "ndarray",
2152-
# variable has type "ExtensionArray")
21532151
lk, _ = lk._values_for_factorize()
21542152

21552153
# error: Item "ndarray" of "Union[Any, ndarray]" has no attribute

pandas/core/series.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1162,12 +1162,11 @@ def _set_with_engine(self, key, value) -> None:
11621162
self._mgr.setitem_inplace(loc, value)
11631163

11641164
def _set_with(self, key, value):
1165-
# other: fancy integer or otherwise
1165+
# We got here via exception-handling off of InvalidIndexError, so
1166+
# key should always be listlike at this point.
11661167
assert not isinstance(key, tuple)
11671168

1168-
if is_scalar(key):
1169-
key = [key]
1170-
elif is_iterator(key):
1169+
if is_iterator(key):
11711170
# Without this, the call to infer_dtype will consume the generator
11721171
key = list(key)
11731172

pandas/core/sorting.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ def nargsort(
354354
ascending: bool = True,
355355
na_position: str = "last",
356356
key: Callable | None = None,
357-
mask: np.ndarray | None = None,
357+
mask: npt.NDArray[np.bool_] | None = None,
358358
) -> npt.NDArray[np.intp]:
359359
"""
360360
Intended to be a drop-in replacement for np.argsort which handles NaNs.
@@ -369,7 +369,7 @@ def nargsort(
369369
ascending : bool, default True
370370
na_position : {'first', 'last'}, default 'last'
371371
key : Optional[Callable], default None
372-
mask : Optional[np.ndarray], default None
372+
mask : Optional[np.ndarray[bool]], default None
373373
Passed when called by ExtensionArray.argsort.
374374
375375
Returns

pandas/tests/io/pytables/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
),
88
pytest.mark.filterwarnings(r"ignore:tostring\(\) is deprecated:DeprecationWarning"),
99
pytest.mark.filterwarnings(
10-
r"ignore:`np\.object` is a deprecated alias:DeprecationWarning"
10+
r"ignore:`np\.object` is a deprecated alias.*:DeprecationWarning"
1111
),
1212
pytest.mark.filterwarnings(
13-
r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning"
13+
r"ignore:`np\.bool` is a deprecated alias.*:DeprecationWarning"
1414
),
1515
]

pandas/tests/resample/test_base.py

+21-20
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,14 @@ def test_asfreq(series_and_frame, freq, create_index):
6161
def test_asfreq_fill_value(series, create_index):
6262
# test for fill value during resampling, issue 3715
6363

64-
s = series
64+
ser = series
6565

66-
result = s.resample("1H").asfreq()
67-
new_index = create_index(s.index[0], s.index[-1], freq="1H")
68-
expected = s.reindex(new_index)
66+
result = ser.resample("1H").asfreq()
67+
new_index = create_index(ser.index[0], ser.index[-1], freq="1H")
68+
expected = ser.reindex(new_index)
6969
tm.assert_series_equal(result, expected)
7070

71-
frame = s.to_frame("value")
71+
frame = ser.to_frame("value")
7272
frame.iloc[1] = None
7373
result = frame.resample("1H").asfreq(fill_value=4.0)
7474
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
@@ -104,11 +104,11 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
104104
if resample_method == "ohlc":
105105
pytest.skip("need to test for ohlc from GH13083")
106106

107-
s = empty_series_dti
108-
result = getattr(s.resample(freq), resample_method)()
107+
ser = empty_series_dti
108+
result = getattr(ser.resample(freq), resample_method)()
109109

110-
expected = s.copy()
111-
expected.index = _asfreq_compat(s.index, freq)
110+
expected = ser.copy()
111+
expected.index = _asfreq_compat(ser.index, freq)
112112

113113
tm.assert_index_equal(result.index, expected.index)
114114
assert result.index.freq == expected.index.freq
@@ -123,17 +123,18 @@ def test_resample_nat_index_series(request, freq, series, resample_method):
123123
if freq == "M":
124124
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
125125

126-
s = series.copy()
127-
s.index = PeriodIndex([NaT] * len(s), freq=freq)
128-
result = getattr(s.resample(freq), resample_method)()
126+
ser = series.copy()
127+
ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
128+
rs = ser.resample(freq)
129+
result = getattr(rs, resample_method)()
129130

130131
if resample_method == "ohlc":
131132
expected = DataFrame(
132-
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
133+
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
133134
)
134135
tm.assert_frame_equal(result, expected, check_dtype=False)
135136
else:
136-
expected = s[:0].copy()
137+
expected = ser[:0].copy()
137138
tm.assert_series_equal(result, expected, check_dtype=False)
138139
tm.assert_index_equal(result.index, expected.index)
139140
assert result.index.freq == expected.index.freq
@@ -226,9 +227,9 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
226227
@pytest.mark.parametrize("freq", ["M", "D", "H"])
227228
def test_apply_to_empty_series(empty_series_dti, freq):
228229
# GH 14313
229-
s = empty_series_dti
230-
result = s.resample(freq).apply(lambda x: 1)
231-
expected = s.resample(freq).apply(np.sum)
230+
ser = empty_series_dti
231+
result = ser.resample(freq).apply(lambda x: 1)
232+
expected = ser.resample(freq).apply(np.sum)
232233

233234
tm.assert_series_equal(result, expected, check_dtype=False)
234235

@@ -248,9 +249,9 @@ def test_resampler_is_iterable(series):
248249
@all_ts
249250
def test_resample_quantile(series):
250251
# GH 15023
251-
s = series
252+
ser = series
252253
q = 0.75
253254
freq = "H"
254-
result = s.resample(freq).quantile(q)
255-
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
255+
result = ser.resample(freq).quantile(q)
256+
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
256257
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)