Skip to content

Commit ee84ef2

Browse files
authored
CLN: assorted (#51318)
* CLN: assorted * lint fixup * pylint fixup
1 parent 49cbae4 commit ee84ef2

File tree

25 files changed

+165
-173
lines changed

25 files changed

+165
-173
lines changed

pandas/_libs/src/parser/io.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
6767

6868
func = PyObject_GetAttrString(src->obj, "read");
6969

70-
/* TODO: does this release the GIL? */
70+
/* Note: PyObject_CallObject requires the GIL */
7171
result = PyObject_CallObject(func, args);
7272
Py_XDECREF(args);
7373
Py_XDECREF(func);

pandas/_libs/tslibs/parsing.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ cdef datetime dateutil_parse(
651651
try:
652652
res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst)
653653
except InvalidOperation:
654+
# GH#51157 dateutil can raise decimal.InvalidOperation
654655
res = None
655656

656657
if res is None:

pandas/_testing/__init__.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
ContextManager,
1515
Counter,
1616
Iterable,
17+
cast,
1718
)
1819

1920
import numpy as np
@@ -121,6 +122,7 @@
121122
PeriodIndex,
122123
TimedeltaIndex,
123124
)
125+
from pandas.core.arrays import ArrowExtensionArray
124126

125127
_N = 30
126128
_K = 4
@@ -1019,11 +1021,11 @@ def shares_memory(left, right) -> bool:
10191021

10201022
if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":
10211023
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
1024+
left = cast("ArrowExtensionArray", left)
10221025
if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":
1023-
# error: "ExtensionArray" has no attribute "_data"
1024-
left_pa_data = left._data # type: ignore[attr-defined]
1025-
# error: "ExtensionArray" has no attribute "_data"
1026-
right_pa_data = right._data # type: ignore[attr-defined]
1026+
right = cast("ArrowExtensionArray", right)
1027+
left_pa_data = left._data
1028+
right_pa_data = right._data
10271029
left_buf1 = left_pa_data.chunk(0).buffers()[1]
10281030
right_buf1 = right_pa_data.chunk(0).buffers()[1]
10291031
return left_buf1 == right_buf1

pandas/compat/numpy/function.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from typing import (
2121
Any,
2222
TypeVar,
23+
cast,
2324
overload,
2425
)
2526

@@ -159,8 +160,8 @@ def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs)
159160
ascending = True
160161

161162
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
162-
# error: Incompatible return value type (got "int", expected "bool")
163-
return ascending # type: ignore[return-value]
163+
ascending = cast(bool, ascending)
164+
return ascending
164165

165166

166167
CLIP_DEFAULTS: dict[str, Any] = {"out": None}

pandas/core/apply.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -597,13 +597,11 @@ class NDFrameApply(Apply):
597597
not GroupByApply or ResamplerWindowApply
598598
"""
599599

600+
obj: DataFrame | Series
601+
600602
@property
601603
def index(self) -> Index:
602-
# error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
603-
# "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
604-
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
605-
# Series]"
606-
return self.obj.index # type:ignore[arg-type]
604+
return self.obj.index
607605

608606
@property
609607
def agg_axis(self) -> Index:

pandas/core/arrays/categorical.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@
8888
from pandas.core.algorithms import (
8989
factorize,
9090
take_nd,
91-
unique1d,
9291
)
9392
from pandas.core.arrays._mixins import (
9493
NDArrayBackedExtensionArray,
@@ -2096,8 +2095,8 @@ def unique(self):
20962095
['b', 'a']
20972096
Categories (3, object): ['a' < 'b' < 'c']
20982097
"""
2099-
unique_codes = unique1d(self.codes)
2100-
return self._from_backing_data(unique_codes)
2098+
# pylint: disable=useless-parent-delegation
2099+
return super().unique()
21012100

21022101
def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
21032102
# make sure we have correct itemsize for resulting codes

pandas/core/frame.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -3811,6 +3811,8 @@ def _getitem_multilevel(self, key):
38113811
# string in the key. If the result is a Series, exclude the
38123812
# implied empty string from its name.
38133813
if len(result.columns) == 1:
3814+
# e.g. test_frame_getitem_multicolumn_empty_level,
3815+
# test_frame_mixed_depth_get, test_loc_setitem_single_column_slice
38143816
top = result.columns[0]
38153817
if isinstance(top, tuple):
38163818
top = top[0]
@@ -7822,13 +7824,13 @@ def combine(
78227824
result = {}
78237825
for col in new_columns:
78247826
series = this[col]
7825-
otherSeries = other[col]
7827+
other_series = other[col]
78267828

78277829
this_dtype = series.dtype
7828-
other_dtype = otherSeries.dtype
7830+
other_dtype = other_series.dtype
78297831

78307832
this_mask = isna(series)
7831-
other_mask = isna(otherSeries)
7833+
other_mask = isna(other_series)
78327834

78337835
# don't overwrite columns unnecessarily
78347836
# DO propagate if this column is not in the intersection
@@ -7838,9 +7840,9 @@ def combine(
78387840

78397841
if do_fill:
78407842
series = series.copy()
7841-
otherSeries = otherSeries.copy()
7843+
other_series = other_series.copy()
78427844
series[this_mask] = fill_value
7843-
otherSeries[other_mask] = fill_value
7845+
other_series[other_mask] = fill_value
78447846

78457847
if col not in self.columns:
78467848
# If self DataFrame does not have col in other DataFrame,
@@ -7855,9 +7857,9 @@ def combine(
78557857
# if we have different dtypes, possibly promote
78567858
new_dtype = find_common_type([this_dtype, other_dtype])
78577859
series = series.astype(new_dtype, copy=False)
7858-
otherSeries = otherSeries.astype(new_dtype, copy=False)
7860+
other_series = other_series.astype(new_dtype, copy=False)
78597861

7860-
arr = func(series, otherSeries)
7862+
arr = func(series, other_series)
78617863
if isinstance(new_dtype, np.dtype):
78627864
# if new_dtype is an EA Dtype, then `func` is expected to return
78637865
# the correct dtype without any additional casting
@@ -9919,7 +9921,7 @@ def _dict_round(df: DataFrame, decimals):
99199921
except KeyError:
99209922
yield vals
99219923

9922-
def _series_round(ser: Series, decimals: int):
9924+
def _series_round(ser: Series, decimals: int) -> Series:
99239925
if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype):
99249926
return ser.round(decimals)
99259927
return ser

pandas/core/groupby/generic.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1265,9 +1265,10 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
12651265
result = op.agg()
12661266
if not is_dict_like(func) and result is not None:
12671267
return result
1268-
elif relabeling and result is not None:
1268+
elif relabeling:
12691269
# this should be the only (non-raising) case with relabeling
12701270
# used reordered index of columns
1271+
result = cast(DataFrame, result)
12711272
result = result.iloc[:, order]
12721273
result = cast(DataFrame, result)
12731274
# error: Incompatible types in assignment (expression has type
@@ -1336,6 +1337,9 @@ def _iterate_slices(self) -> Iterable[Series]:
13361337
else:
13371338
for label, values in obj.items():
13381339
if label in self.exclusions:
1340+
# Note: if we tried to just iterate over _obj_with_exclusions,
1341+
# we would break test_wrap_agg_out by yielding a column
1342+
# that is skipped here but not dropped from obj_with_exclusions
13391343
continue
13401344

13411345
yield values
@@ -1379,6 +1383,7 @@ def _wrap_applied_output(
13791383
return result
13801384

13811385
# GH12824
1386+
# using values[0] here breaks test_groupby_apply_none_first
13821387
first_not_none = next(com.not_none(*values), None)
13831388

13841389
if first_not_none is None:
@@ -1817,7 +1822,7 @@ def _indexed_output_to_ndframe(
18171822
def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
18181823
return self.obj._constructor(mgr)
18191824

1820-
def _iterate_column_groupbys(self, obj: DataFrame | Series):
1825+
def _iterate_column_groupbys(self, obj: DataFrame):
18211826
for i, colname in enumerate(obj.columns):
18221827
yield colname, SeriesGroupBy(
18231828
obj.iloc[:, i],

pandas/core/groupby/groupby.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ class providing the base-class of operations.
9090

9191
from pandas.core import (
9292
algorithms,
93-
nanops,
9493
sample,
9594
)
9695
from pandas.core._numba import executor
@@ -1345,10 +1344,6 @@ def f(g):
13451344
with np.errstate(all="ignore"):
13461345
return func(g, *args, **kwargs)
13471346

1348-
elif hasattr(nanops, f"nan{func}"):
1349-
# TODO: should we wrap this in to e.g. _is_builtin_func?
1350-
f = getattr(nanops, f"nan{func}")
1351-
13521347
else:
13531348
raise ValueError(
13541349
"func must be a callable if args or kwargs are supplied"
@@ -1420,6 +1415,8 @@ def _python_apply_general(
14201415
is_transform,
14211416
)
14221417

1418+
# TODO: I (jbrockmendel) think this should be equivalent to doing grouped_reduce
1419+
# on _agg_py_fallback, but trying that here fails a bunch of tests 2023-02-07.
14231420
@final
14241421
def _python_agg_general(self, func, *args, **kwargs):
14251422
func = com.is_builtin_func(func)
@@ -2905,10 +2902,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
29052902
out[i, :] = algorithms.take_nd(value_element, indexer)
29062903
return out
29072904

2908-
obj = self._obj_with_exclusions
2909-
if self.axis == 1:
2910-
obj = obj.T
2911-
mgr = obj._mgr
2905+
mgr = self._get_data_to_aggregate()
29122906
res_mgr = mgr.apply(blk_func)
29132907

29142908
new_obj = self._wrap_agged_manager(res_mgr)

pandas/core/groupby/ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
207207
if how in ["var", "mean"] or (
208208
self.kind == "transform" and self.has_dropped_na
209209
):
210+
# has_dropped_na check need for test_null_group_str_transformer
210211
# result may still include NaN, so we have to cast
211212
values = ensure_float64(values)
212213

pandas/core/internals/blocks.py

-4
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,6 @@ def _replace_regex(
630630
to_replace,
631631
value,
632632
inplace: bool = False,
633-
convert: bool = True,
634633
mask=None,
635634
) -> list[Block]:
636635
"""
@@ -644,8 +643,6 @@ def _replace_regex(
644643
Replacement object.
645644
inplace : bool, default False
646645
Perform inplace modification.
647-
convert : bool, default True
648-
If true, try to coerce any object types to better types.
649646
mask : array-like of bool, optional
650647
True indicate corresponding element is ignored.
651648
@@ -788,7 +785,6 @@ def _replace_coerce(
788785
to_replace,
789786
value,
790787
inplace=inplace,
791-
convert=False,
792788
mask=mask,
793789
)
794790
else:

pandas/core/nanops.py

+4
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,10 @@ def _maybe_null_out(
15121512
Dtype
15131513
The product of all elements on a given axis. ( NaNs are treated as 1)
15141514
"""
1515+
if mask is None and min_count == 0:
1516+
# nothing to check; short-circuit
1517+
return result
1518+
15151519
if axis is not None and isinstance(result, np.ndarray):
15161520
if mask is not None:
15171521
null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0

pandas/io/pytables.py

+3
Original file line numberDiff line numberDiff line change
@@ -4045,6 +4045,9 @@ def get_blk_items(mgr):
40454045
blocks = list(mgr.blocks)
40464046
blk_items = get_blk_items(mgr)
40474047
for c in data_columns:
4048+
# This reindex would raise ValueError if we had a duplicate
4049+
# index, so we can infer that (as long as axis==1) we
4050+
# get a single column back, so a single block.
40484051
mgr = frame.reindex([c], axis=axis)._mgr
40494052
mgr = cast(BlockManager, mgr)
40504053
blocks.extend(mgr.blocks)

pandas/tests/arithmetic/test_period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1574,7 +1574,7 @@ def test_pi_sub_period(self):
15741574
assert result.freq == exp.freq
15751575

15761576
def test_pi_sub_pdnat(self):
1577-
# GH#13071
1577+
# GH#13071, GH#19389
15781578
idx = PeriodIndex(
15791579
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
15801580
)

pandas/tests/dtypes/test_common.py

-1
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,6 @@ def test_is_datetime_or_timedelta_dtype():
496496
assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
497497
assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"]))
498498

499-
# TODO(jreback), this is slightly suspect
500499
assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern"))
501500

502501
assert com.is_datetime_or_timedelta_dtype(np.datetime64)

pandas/tests/dtypes/test_missing.py

+2
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ def test_array_equivalent_nested(strict_nan):
568568
assert not array_equivalent(left, right, strict_nan=strict_nan)
569569

570570

571+
@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
571572
@pytest.mark.parametrize(
572573
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
573574
)
@@ -610,6 +611,7 @@ def test_array_equivalent_nested_list(strict_nan):
610611
assert not array_equivalent(left, right, strict_nan=strict_nan)
611612

612613

614+
@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
613615
@pytest.mark.xfail(reason="failing")
614616
@pytest.mark.parametrize("strict_nan", [True, False])
615617
def test_array_equivalent_nested_mixed_list(strict_nan):

pandas/tests/groupby/test_allowlist.py

-2
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,6 @@ def test_all_methods_categorized(mframe):
314314

315315
# removed a public method?
316316
all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods
317-
print(names)
318-
print(all_categorized)
319317
if names != all_categorized:
320318
msg = f"""
321319
Some methods which are supposed to be on the Grouper class

pandas/tests/groupby/test_bin_groupby.py

-4
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,3 @@ def test_generate_bins(binner, closed, expected):
6363
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
6464
result = lib.generate_bins_dt64(values, binner, closed=closed)
6565
tm.assert_numpy_array_equal(result, expected)
66-
67-
68-
class TestMoments:
69-
pass

pandas/tests/groupby/test_filters.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,7 @@ def test_filter_and_transform_with_non_unique_int_index():
369369
tm.assert_series_equal(actual, expected)
370370

371371
actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
372-
NA = np.nan
373-
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
372+
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
374373
# ^ made manually because this can get confusing!
375374
tm.assert_series_equal(actual, expected)
376375

@@ -412,8 +411,7 @@ def test_filter_and_transform_with_multiple_non_unique_int_index():
412411
tm.assert_series_equal(actual, expected)
413412

414413
actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
415-
NA = np.nan
416-
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
414+
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
417415
# ^ made manually because this can get confusing!
418416
tm.assert_series_equal(actual, expected)
419417

@@ -455,8 +453,7 @@ def test_filter_and_transform_with_non_unique_float_index():
455453
tm.assert_series_equal(actual, expected)
456454

457455
actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
458-
NA = np.nan
459-
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
456+
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
460457
# ^ made manually because this can get confusing!
461458
tm.assert_series_equal(actual, expected)
462459

@@ -501,8 +498,7 @@ def test_filter_and_transform_with_non_unique_timestamp_index():
501498
tm.assert_series_equal(actual, expected)
502499

503500
actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
504-
NA = np.nan
505-
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
501+
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
506502
# ^ made manually because this can get confusing!
507503
tm.assert_series_equal(actual, expected)
508504

@@ -544,8 +540,7 @@ def test_filter_and_transform_with_non_unique_string_index():
544540
tm.assert_series_equal(actual, expected)
545541

546542
actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
547-
NA = np.nan
548-
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
543+
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
549544
# ^ made manually because this can get confusing!
550545
tm.assert_series_equal(actual, expected)
551546

0 commit comments

Comments
 (0)