Skip to content

CLN: assorted #51318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,

func = PyObject_GetAttrString(src->obj, "read");

/* TODO: does this release the GIL? */
/* Note: PyObject_CallObject requires the GIL */
result = PyObject_CallObject(func, args);
Py_XDECREF(args);
Py_XDECREF(func);
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ cdef datetime dateutil_parse(
try:
res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst)
except InvalidOperation:
# GH#51157 dateutil can raise decimal.InvalidOperation
res = None

if res is None:
Expand Down
10 changes: 6 additions & 4 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
ContextManager,
Counter,
Iterable,
cast,
)

import numpy as np
Expand Down Expand Up @@ -121,6 +122,7 @@
PeriodIndex,
TimedeltaIndex,
)
from pandas.core.arrays import ArrowExtensionArray

_N = 30
_K = 4
Expand Down Expand Up @@ -1019,11 +1021,11 @@ def shares_memory(left, right) -> bool:

if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
left = cast("ArrowExtensionArray", left)
if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":
# error: "ExtensionArray" has no attribute "_data"
left_pa_data = left._data # type: ignore[attr-defined]
# error: "ExtensionArray" has no attribute "_data"
right_pa_data = right._data # type: ignore[attr-defined]
right = cast("ArrowExtensionArray", right)
left_pa_data = left._data
right_pa_data = right._data
left_buf1 = left_pa_data.chunk(0).buffers()[1]
right_buf1 = right_pa_data.chunk(0).buffers()[1]
return left_buf1 == right_buf1
Expand Down
5 changes: 3 additions & 2 deletions pandas/compat/numpy/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from typing import (
Any,
TypeVar,
cast,
overload,
)

Expand Down Expand Up @@ -159,8 +160,8 @@ def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs)
ascending = True

validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
# error: Incompatible return value type (got "int", expected "bool")
return ascending # type: ignore[return-value]
ascending = cast(bool, ascending)
return ascending


CLIP_DEFAULTS: dict[str, Any] = {"out": None}
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,13 +588,11 @@ class NDFrameApply(Apply):
not GroupByApply or ResamplerWindowApply
"""

obj: DataFrame | Series

@property
def index(self) -> Index:
# error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
# "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
# Series]"
return self.obj.index # type:ignore[arg-type]
return self.obj.index

@property
def agg_axis(self) -> Index:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
from pandas.core.algorithms import (
factorize,
take_nd,
unique1d,
)
from pandas.core.arrays._mixins import (
NDArrayBackedExtensionArray,
Expand Down Expand Up @@ -2096,8 +2095,8 @@ def unique(self):
['b', 'a']
Categories (3, object): ['a' < 'b' < 'c']
"""
unique_codes = unique1d(self.codes)
return self._from_backing_data(unique_codes)
# pylint: disable=useless-parent-delegation
return super().unique()

def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
# make sure we have correct itemsize for resulting codes
Expand Down
18 changes: 10 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3811,6 +3811,8 @@ def _getitem_multilevel(self, key):
# string in the key. If the result is a Series, exclude the
# implied empty string from its name.
if len(result.columns) == 1:
# e.g. test_frame_getitem_multicolumn_empty_level,
# test_frame_mixed_depth_get, test_loc_setitem_single_column_slice
top = result.columns[0]
if isinstance(top, tuple):
top = top[0]
Expand Down Expand Up @@ -7822,13 +7824,13 @@ def combine(
result = {}
for col in new_columns:
series = this[col]
otherSeries = other[col]
other_series = other[col]

this_dtype = series.dtype
other_dtype = otherSeries.dtype
other_dtype = other_series.dtype

this_mask = isna(series)
other_mask = isna(otherSeries)
other_mask = isna(other_series)

# don't overwrite columns unnecessarily
# DO propagate if this column is not in the intersection
Expand All @@ -7838,9 +7840,9 @@ def combine(

if do_fill:
series = series.copy()
otherSeries = otherSeries.copy()
other_series = other_series.copy()
series[this_mask] = fill_value
otherSeries[other_mask] = fill_value
other_series[other_mask] = fill_value

if col not in self.columns:
# If self DataFrame does not have col in other DataFrame,
Expand All @@ -7855,9 +7857,9 @@ def combine(
# if we have different dtypes, possibly promote
new_dtype = find_common_type([this_dtype, other_dtype])
series = series.astype(new_dtype, copy=False)
otherSeries = otherSeries.astype(new_dtype, copy=False)
other_series = other_series.astype(new_dtype, copy=False)

arr = func(series, otherSeries)
arr = func(series, other_series)
if isinstance(new_dtype, np.dtype):
# if new_dtype is an EA Dtype, then `func` is expected to return
# the correct dtype without any additional casting
Expand Down Expand Up @@ -9919,7 +9921,7 @@ def _dict_round(df: DataFrame, decimals):
except KeyError:
yield vals

def _series_round(ser: Series, decimals: int):
def _series_round(ser: Series, decimals: int) -> Series:
if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype):
return ser.round(decimals)
return ser
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,9 +1265,10 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
result = op.agg()
if not is_dict_like(func) and result is not None:
return result
elif relabeling and result is not None:
elif relabeling:
# this should be the only (non-raising) case with relabeling
# used reordered index of columns
result = cast(DataFrame, result)
result = result.iloc[:, order]
result = cast(DataFrame, result)
# error: Incompatible types in assignment (expression has type
Expand Down Expand Up @@ -1336,6 +1337,9 @@ def _iterate_slices(self) -> Iterable[Series]:
else:
for label, values in obj.items():
if label in self.exclusions:
# Note: if we tried to just iterate over _obj_with_exclusions,
# we would break test_wrap_agg_out by yielding a column
# that is skipped here but not dropped from obj_with_exclusions
continue

yield values
Expand Down Expand Up @@ -1379,6 +1383,7 @@ def _wrap_applied_output(
return result

# GH12824
# using values[0] here breaks test_groupby_apply_none_first
first_not_none = next(com.not_none(*values), None)

if first_not_none is None:
Expand Down Expand Up @@ -1817,7 +1822,7 @@ def _indexed_output_to_ndframe(
def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
return self.obj._constructor(mgr)

def _iterate_column_groupbys(self, obj: DataFrame | Series):
def _iterate_column_groupbys(self, obj: DataFrame):
for i, colname in enumerate(obj.columns):
yield colname, SeriesGroupBy(
obj.iloc[:, i],
Expand Down
12 changes: 3 additions & 9 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ class providing the base-class of operations.

from pandas.core import (
algorithms,
nanops,
sample,
)
from pandas.core._numba import executor
Expand Down Expand Up @@ -1342,10 +1341,6 @@ def f(g):
with np.errstate(all="ignore"):
return func(g, *args, **kwargs)

elif hasattr(nanops, f"nan{func}"):
# TODO: should we wrap this in to e.g. _is_builtin_func?
f = getattr(nanops, f"nan{func}")

else:
raise ValueError(
"func must be a callable if args or kwargs are supplied"
Expand Down Expand Up @@ -1417,6 +1412,8 @@ def _python_apply_general(
is_transform,
)

# TODO: I (jbrockmendel) think this should be equivalent to doing grouped_reduce
# on _agg_py_fallback, but trying that here fails a bunch of tests 2023-02-07.
@final
def _python_agg_general(self, func, *args, **kwargs):
func = com.is_builtin_func(func)
Expand Down Expand Up @@ -2902,10 +2899,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
out[i, :] = algorithms.take_nd(value_element, indexer)
return out

obj = self._obj_with_exclusions
if self.axis == 1:
obj = obj.T
mgr = obj._mgr
mgr = self._get_data_to_aggregate()
res_mgr = mgr.apply(blk_func)

new_obj = self._wrap_agged_manager(res_mgr)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
if how in ["var", "mean"] or (
self.kind == "transform" and self.has_dropped_na
):
# has_dropped_na check need for test_null_group_str_transformer
# result may still include NaN, so we have to cast
values = ensure_float64(values)

Expand Down
4 changes: 0 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,6 @@ def _replace_regex(
to_replace,
value,
inplace: bool = False,
convert: bool = True,
mask=None,
) -> list[Block]:
"""
Expand All @@ -644,8 +643,6 @@ def _replace_regex(
Replacement object.
inplace : bool, default False
Perform inplace modification.
convert : bool, default True
If true, try to coerce any object types to better types.
mask : array-like of bool, optional
True indicate corresponding element is ignored.

Expand Down Expand Up @@ -788,7 +785,6 @@ def _replace_coerce(
to_replace,
value,
inplace=inplace,
convert=False,
mask=mask,
)
else:
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,10 @@ def _maybe_null_out(
Dtype
The product of all elements on a given axis. ( NaNs are treated as 1)
"""
if mask is None and min_count == 0:
# nothing to check; short-circuit
return result

if axis is not None and isinstance(result, np.ndarray):
if mask is not None:
null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4045,6 +4045,9 @@ def get_blk_items(mgr):
blocks = list(mgr.blocks)
blk_items = get_blk_items(mgr)
for c in data_columns:
# This reindex would raise ValueError if we had a duplicate
# index, so we can infer that (as long as axis==1) we
# get a single column back, so a single block.
mgr = frame.reindex([c], axis=axis)._mgr
mgr = cast(BlockManager, mgr)
blocks.extend(mgr.blocks)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1574,7 +1574,7 @@ def test_pi_sub_period(self):
assert result.freq == exp.freq

def test_pi_sub_pdnat(self):
# GH#13071
# GH#13071, GH#19389
idx = PeriodIndex(
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,6 @@ def test_is_datetime_or_timedelta_dtype():
assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"]))

# TODO(jreback), this is slightly suspect
assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern"))

assert com.is_datetime_or_timedelta_dtype(np.datetime64)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ def test_array_equivalent_nested(strict_nan):
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
Expand Down Expand Up @@ -610,6 +611,7 @@ def test_array_equivalent_nested_list(strict_nan):
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_mixed_list(strict_nan):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/groupby/test_allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,6 @@ def test_all_methods_categorized(mframe):

# removed a public method?
all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods
print(names)
print(all_categorized)
if names != all_categorized:
msg = f"""
Some methods which are supposed to be on the Grouper class
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/groupby/test_bin_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,3 @@ def test_generate_bins(binner, closed, expected):
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
result = lib.generate_bins_dt64(values, binner, closed=closed)
tm.assert_numpy_array_equal(result, expected)


class TestMoments:
pass
15 changes: 5 additions & 10 deletions pandas/tests/groupby/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,7 @@ def test_filter_and_transform_with_non_unique_int_index():
tm.assert_series_equal(actual, expected)

actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
NA = np.nan
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
# ^ made manually because this can get confusing!
tm.assert_series_equal(actual, expected)

Expand Down Expand Up @@ -412,8 +411,7 @@ def test_filter_and_transform_with_multiple_non_unique_int_index():
tm.assert_series_equal(actual, expected)

actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
NA = np.nan
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
# ^ made manually because this can get confusing!
tm.assert_series_equal(actual, expected)

Expand Down Expand Up @@ -455,8 +453,7 @@ def test_filter_and_transform_with_non_unique_float_index():
tm.assert_series_equal(actual, expected)

actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
NA = np.nan
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
# ^ made manually because this can get confusing!
tm.assert_series_equal(actual, expected)

Expand Down Expand Up @@ -501,8 +498,7 @@ def test_filter_and_transform_with_non_unique_timestamp_index():
tm.assert_series_equal(actual, expected)

actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
NA = np.nan
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
# ^ made manually because this can get confusing!
tm.assert_series_equal(actual, expected)

Expand Down Expand Up @@ -544,8 +540,7 @@ def test_filter_and_transform_with_non_unique_string_index():
tm.assert_series_equal(actual, expected)

actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
NA = np.nan
expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid")
expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid")
# ^ made manually because this can get confusing!
tm.assert_series_equal(actual, expected)

Expand Down
Loading