-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TST: Isin converted floats unnecessarily to int causing rounding issues #37770
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
addfd75
63b1bc5
f2273a3
1e4d572
c0b1ab2
06aa378
d034421
c5b31ce
2d34b8f
b41b689
ef12c10
556cc52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
construct_1d_object_array_from_listlike, | ||
infer_dtype_from_array, | ||
maybe_promote, | ||
maybe_upcast, | ||
) | ||
from pandas.core.dtypes.common import ( | ||
ensure_float64, | ||
|
@@ -431,6 +432,13 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: | |
return cast("Categorical", comps).isin(values) | ||
|
||
comps, dtype = _ensure_data(comps) | ||
if is_numeric_dtype(comps): | ||
try: | ||
# Try finding a dtype which would not change our values | ||
values, _ = maybe_upcast(values, dtype=dtype) | ||
dtype = values.dtype | ||
except (ValueError, TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what cases raise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a big fan of this, can we not be explict here on the dtype checks (as we are elsewhere).? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use |
||
pass | ||
values, _ = _ensure_data(values, dtype=dtype) | ||
|
||
# faster for larger cases to use np.in1d | ||
|
@@ -445,7 +453,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: | |
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) | ||
else: | ||
f = np.in1d | ||
elif is_integer_dtype(comps): | ||
elif is_integer_dtype(comps) and is_integer_dtype(values): | ||
try: | ||
values = values.astype("int64", copy=False) | ||
comps = comps.astype("int64", copy=False) | ||
|
@@ -454,7 +462,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: | |
values = values.astype(object) | ||
comps = comps.astype(object) | ||
|
||
elif is_float_dtype(comps): | ||
elif is_numeric_dtype(comps) or is_numeric_dtype(values): | ||
try: | ||
values = values.astype("float64", copy=False) | ||
comps = comps.astype("float64", copy=False) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"cast
float
unnecessarily" -> "unnecessarily castingfloat
dtypes"There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably needs to move to 1.3.0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it makes more sense removing the whatsnew entirely, since your fix went into 1.2. Would be confusing having this in 1.3