-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Continue de-nesting core.ops #19448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Continue de-nesting core.ops #19448
Changes from all commits
b048b47
fb6f534
5105786
82fe7a1
399fcd5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,8 +39,7 @@ | |
ABCSeries, | ||
ABCDataFrame, | ||
ABCIndex, | ||
ABCPeriodIndex, | ||
ABCSparseSeries) | ||
ABCSparseSeries, ABCSparseArray) | ||
|
||
|
||
def _gen_eval_kwargs(name): | ||
|
@@ -445,17 +444,22 @@ def names(x): | |
return new_methods | ||
|
||
|
||
def add_methods(cls, new_methods, force): | ||
def add_methods(cls, new_methods): | ||
for name, method in new_methods.items(): | ||
# For most methods, if we find that the class already has a method | ||
# of the same name, it is OK to over-write it. The exception is | ||
# inplace methods (__iadd__, __isub__, ...) for SparseArray, which | ||
# retain the np.ndarray versions. | ||
force = not (issubclass(cls, ABCSparseArray) and | ||
name.startswith('__i')) | ||
if force or name not in cls.__dict__: | ||
bind_method(cls, name, method) | ||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# Arithmetic | ||
def add_special_arithmetic_methods(cls, arith_method=None, | ||
comp_method=None, bool_method=None, | ||
force=False): | ||
comp_method=None, bool_method=None): | ||
""" | ||
Adds the full suite of special arithmetic methods (``__add__``, | ||
``__sub__``, etc.) to the class. | ||
|
@@ -469,9 +473,6 @@ def add_special_arithmetic_methods(cls, arith_method=None, | |
factory for rich comparison - signature: f(op, name, str_rep) | ||
bool_method : function (optional) | ||
factory for boolean methods - signature: f(op, name, str_rep) | ||
force : bool, default False | ||
if False, checks whether function is defined **on ``cls.__dict__``** | ||
before defining if True, always defines functions on class base | ||
""" | ||
new_methods = _create_methods(cls, arith_method, comp_method, bool_method, | ||
special=True) | ||
|
@@ -512,12 +513,11 @@ def f(self, other): | |
__ior__=_wrap_inplace_method(new_methods["__or__"]), | ||
__ixor__=_wrap_inplace_method(new_methods["__xor__"]))) | ||
|
||
add_methods(cls, new_methods=new_methods, force=force) | ||
add_methods(cls, new_methods=new_methods) | ||
|
||
|
||
def add_flex_arithmetic_methods(cls, flex_arith_method, | ||
flex_comp_method=None, flex_bool_method=None, | ||
force=False): | ||
flex_comp_method=None, flex_bool_method=None): | ||
""" | ||
Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) | ||
to the class. | ||
|
@@ -529,9 +529,6 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, | |
f(op, name, str_rep) | ||
flex_comp_method : function, optional, | ||
factory for rich comparison - signature: f(op, name, str_rep) | ||
force : bool, default False | ||
if False, checks whether function is defined **on ``cls.__dict__``** | ||
before defining if True, always defines functions on class base | ||
""" | ||
new_methods = _create_methods(cls, flex_arith_method, | ||
flex_comp_method, flex_bool_method, | ||
|
@@ -544,7 +541,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, | |
if k in new_methods: | ||
new_methods.pop(k) | ||
|
||
add_methods(cls, new_methods=new_methods, force=force) | ||
add_methods(cls, new_methods=new_methods) | ||
|
||
|
||
# ----------------------------------------------------------------------------- | ||
|
@@ -614,14 +611,11 @@ def na_op(x, y): | |
result = np.empty(x.size, dtype=dtype) | ||
mask = notna(x) & notna(y) | ||
result[mask] = op(x[mask], com._values_from_object(y[mask])) | ||
elif isinstance(x, np.ndarray): | ||
else: | ||
assert isinstance(x, np.ndarray) | ||
result = np.empty(len(x), dtype=x.dtype) | ||
mask = notna(x) | ||
result[mask] = op(x[mask], y) | ||
else: | ||
raise TypeError("{typ} cannot perform the operation " | ||
"{op}".format(typ=type(x).__name__, | ||
op=str_rep)) | ||
|
||
result, changed = maybe_upcast_putmask(result, ~mask, np.nan) | ||
|
||
|
@@ -658,6 +652,10 @@ def wrapper(left, right, name=name, na_op=na_op): | |
index=left.index, name=res_name, | ||
dtype=result.dtype) | ||
|
||
elif is_categorical_dtype(left): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the request for comments in the code for what tests hit this path? Or confirmation here that such tests exist? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests.categorical.test_operators.TestCategoricalOps.test_numeric_like_ops hits this path. The change here is catching this case early (and explicitly) instead of in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my requests are about things that are being added. e.g. if its just refactoing then existing tests are ok, but some things look like they are catching additional cases, so should have tests for these. a coverage analysis can test you (compare before and after) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is very nearly pure refactoring to catch things earlier, more explicitly, and with fewer levels of indentation. I'll go through the diff and annotate any exceptions to this rule. The |
||
raise TypeError("{typ} cannot perform the operation " | ||
"{op}".format(typ=type(left).__name__, op=str_rep)) | ||
|
||
lvalues = left.values | ||
rvalues = right | ||
if isinstance(rvalues, ABCSeries): | ||
|
@@ -745,24 +743,19 @@ def na_op(x, y): | |
elif is_categorical_dtype(y) and not is_scalar(y): | ||
return op(y, x) | ||
|
||
if is_object_dtype(x.dtype): | ||
elif is_object_dtype(x.dtype): | ||
result = _comp_method_OBJECT_ARRAY(op, x, y) | ||
|
||
elif is_datetimelike_v_numeric(x, y): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests.series.test_operators.TestSeriesComparisons.test_comparison_invalid. This is just moved up a few lines and down one level of indentation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
raise TypeError("invalid type comparison") | ||
|
||
else: | ||
|
||
# we want to compare like types | ||
# we only want to convert to integer like if | ||
# we are not NotImplemented, otherwise | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment no longer relevant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment may be more verbose than it needs to be, but is still relevant. Once #19301 goes in we can dispatch to DTI and TDI and the needs_i8_conversion block below can be simplified quite a bit. |
||
# we would allow datetime64 (but viewed as i8) against | ||
# integer comparisons | ||
if is_datetimelike_v_numeric(x, y): | ||
raise TypeError("invalid type comparison") | ||
|
||
# numpy does not like comparisons vs None | ||
if is_scalar(y) and isna(y): | ||
if name == '__ne__': | ||
return np.ones(len(x), dtype=bool) | ||
else: | ||
return np.zeros(len(x), dtype=bool) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The is_datetimelike_v_numeric and is_scalar cases are moved to earlier in the checking process, not removed. |
||
|
||
# we have a datetime/timedelta and may need to convert | ||
mask = None | ||
|
@@ -795,39 +788,44 @@ def wrapper(self, other, axis=None): | |
if axis is not None: | ||
self._get_axis_number(axis) | ||
|
||
if isinstance(other, ABCSeries): | ||
if isinstance(other, ABCDataFrame): # pragma: no cover | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a comment here about early failing |
||
# Defer to DataFrame implementation; fail early | ||
return NotImplemented | ||
|
||
elif isinstance(other, ABCSeries): | ||
name = com._maybe_match_name(self, other) | ||
if not self._indexed_same(other): | ||
msg = 'Can only compare identically-labeled Series objects' | ||
raise ValueError(msg) | ||
return self._constructor(na_op(self.values, other.values), | ||
index=self.index, name=name) | ||
elif isinstance(other, ABCDataFrame): # pragma: no cover | ||
return NotImplemented | ||
res_values = na_op(self.values, other.values) | ||
return self._constructor(res_values, index=self.index, name=name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Everything in this part of the diff (788-799) is pure refactor, moving the ABCDataFrame check to the top spot for consistency |
||
|
||
elif isinstance(other, (np.ndarray, pd.Index)): | ||
# do not check length of zerodim array | ||
# as it will broadcast | ||
if (not is_scalar(lib.item_from_zerodim(other)) and | ||
len(self) != len(other)): | ||
raise ValueError('Lengths must match to compare') | ||
|
||
if isinstance(other, ABCPeriodIndex): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests.series.test_operators.TestSeriesComparisons.test_nat_comparisons (2 parametrized cases) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think removing this check is the main piece of code actually removed instead of just rearranged. The comment mentions a temp workaround and it is no longer necessary. |
||
# temp workaround until fixing GH 13637 | ||
# tested in test_nat_comparisons | ||
# (pandas.tests.series.test_operators.TestSeriesOperators) | ||
return self._constructor(na_op(self.values, | ||
other.astype(object).values), | ||
index=self.index) | ||
|
||
return self._constructor(na_op(self.values, np.asarray(other)), | ||
res_values = na_op(self.values, np.asarray(other)) | ||
return self._constructor(res_values, | ||
index=self.index).__finalize__(self) | ||
|
||
elif isinstance(other, pd.Categorical): | ||
if not is_categorical_dtype(self): | ||
msg = ("Cannot compare a Categorical for op {op} with Series " | ||
"of dtype {typ}.\nIf you want to compare values, use " | ||
"'series <op> np.asarray(other)'.") | ||
raise TypeError(msg.format(op=op, typ=self.dtype)) | ||
elif (isinstance(other, pd.Categorical) and | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is catching pd.Categorical specifically (as opposed to CategoricalIndex or Series[Categorical]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's totally not obvious, is this tested or needed? that seems oddly specific There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well the two branches preceeding this handle ABCSeries and pd.Index cases, and the (existing) error message specifically refers to a Categorical There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, pls revist at some point, this should prob be less specific and more about a non-ndarray like (e.g. an ExtensionArray check) |
||
not is_categorical_dtype(self)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests.categorical.test_operators.TestCategoricalOpsWithFactor.test_comparisons |
||
raise TypeError("Cannot compare a Categorical for op {op} with " | ||
"Series of dtype {typ}.\nIf you want to compare " | ||
"values, use 'series <op> np.asarray(other)'." | ||
.format(op=op, typ=self.dtype)) | ||
|
||
elif is_scalar(other) and isna(other): | ||
# numpy does not like comparisons vs None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests.series.test_operators.TestSeriesComparisons.test_nat_comparisons_scalar, tests.series.test_operators.TestSeriesComparisons.test_more_na_comparisons, tests.series.test_arithmetic.TestTimestampSeriesComparison.test_timestamp_equality, tests.series.test_arithmetic.TestTimestampSeriesComparison.test_timestamp_compare_series total of 10 cases between these with parametrization There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (This is moved down from |
||
if op is operator.ne: | ||
res_values = np.ones(len(self), dtype=bool) | ||
else: | ||
res_values = np.zeros(len(self), dtype=bool) | ||
return self._constructor(res_values, index=self.index, | ||
name=self.name, dtype='bool') | ||
|
||
if is_categorical_dtype(self): | ||
# cats are a special case as get_values() would return an ndarray, | ||
|
@@ -877,11 +875,10 @@ def na_op(x, y): | |
y = _ensure_object(y) | ||
result = lib.vec_binop(x, y, op) | ||
else: | ||
# let null fall thru | ||
if not isna(y): | ||
y = bool(y) | ||
try: | ||
|
||
# let null fall thru | ||
if not isna(y): | ||
y = bool(y) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just moving the |
||
result = lib.scalar_binop(x, y, op) | ||
except: | ||
msg = ("cannot compare a dtyped [{dtype}] array " | ||
|
@@ -899,26 +896,31 @@ def wrapper(self, other): | |
|
||
self, other = _align_method_SERIES(self, other, align_asobject=True) | ||
|
||
if isinstance(other, ABCSeries): | ||
if isinstance(other, ABCDataFrame): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment here |
||
# Defer to DataFrame implementation; fail early | ||
return NotImplemented | ||
|
||
elif isinstance(other, ABCSeries): | ||
name = com._maybe_match_name(self, other) | ||
is_other_int_dtype = is_integer_dtype(other.dtype) | ||
other = fill_int(other) if is_other_int_dtype else fill_bool(other) | ||
|
||
filler = (fill_int if is_self_int_dtype and is_other_int_dtype | ||
else fill_bool) | ||
return filler(self._constructor(na_op(self.values, other.values), | ||
index=self.index, name=name)) | ||
|
||
elif isinstance(other, ABCDataFrame): | ||
return NotImplemented | ||
res_values = na_op(self.values, other.values) | ||
unfilled = self._constructor(res_values, | ||
index=self.index, name=name) | ||
return filler(unfilled) | ||
|
||
else: | ||
# scalars, list, tuple, np.array | ||
filler = (fill_int if is_self_int_dtype and | ||
is_integer_dtype(np.asarray(other)) else fill_bool) | ||
return filler(self._constructor( | ||
na_op(self.values, other), | ||
index=self.index)).__finalize__(self) | ||
|
||
res_values = na_op(self.values, other) | ||
unfilled = self._constructor(res_values, index=self.index) | ||
return filler(unfilled).__finalize__(self) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the edits to this function (895-918) are cleanup/refactor. |
||
|
||
return wrapper | ||
|
||
|
@@ -1023,21 +1025,23 @@ def na_op(x, y): | |
mask = notna(xrav) & notna(yrav) | ||
xrav = xrav[mask] | ||
|
||
# we may need to manually | ||
# broadcast a 1 element array | ||
if yrav.shape != mask.shape: | ||
yrav = np.empty(mask.shape, dtype=yrav.dtype) | ||
yrav.fill(yrav.item()) | ||
# FIXME: GH#5284, GH#5035, GH#19448 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests that hit this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #19421. Only one test hits this, and only in py3:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a change and not just a refactor. As per the comments, it is horribly broken, and better to raise intentionally than accidentally have the |
||
# Without specifically raising here we get mismatched | ||
# errors in Py3 (TypeError) vs Py2 (ValueError) | ||
raise ValueError('Cannot broadcast operands together.') | ||
|
||
yrav = yrav[mask] | ||
if np.prod(xrav.shape) and np.prod(yrav.shape): | ||
if xrav.size: | ||
with np.errstate(all='ignore'): | ||
result[mask] = op(xrav, yrav) | ||
elif hasattr(x, 'size'): | ||
|
||
elif isinstance(x, np.ndarray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment here |
||
# mask is only meaningful for x | ||
result = np.empty(x.size, dtype=x.dtype) | ||
mask = notna(xrav) | ||
xrav = xrav[mask] | ||
if np.prod(xrav.shape): | ||
if xrav.size: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
with np.errstate(all='ignore'): | ||
result[mask] = op(xrav, y) | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The cases currently caught by this
else: raise TypeError
are after this PR caught in theis_categorical_dtype
check on 652.