-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Dtype was not changed when float was assignt to int column #37680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
eafd9ea
c95dcc9
3788c4d
27d9e35
23cc926
cee9fc2
97e6da8
d1bc870
7e16465
4d91e66
d608474
1d332e8
760c6a1
870e927
fa8b331
903be31
566d769
916faa4
b0699a9
33c8424
6e7ee82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,7 +91,6 @@ | |
maybe_downcast_to_dtype, | ||
maybe_infer_to_datetimelike, | ||
maybe_upcast, | ||
validate_numeric_casting, | ||
) | ||
from pandas.core.dtypes.common import ( | ||
ensure_int64, | ||
|
@@ -3207,13 +3206,8 @@ def _set_value(self, index, col, value, takeable: bool = False): | |
return | ||
|
||
series = self._get_item_cache(col) | ||
engine = self.index._engine | ||
loc = engine.get_loc(index) | ||
validate_numeric_casting(series.dtype, value) | ||
|
||
series._values[loc] = value | ||
# Note: trying to use series._set_value breaks tests in | ||
# tests.frame.indexing.test_indexing and tests.indexing.test_partial | ||
self.index._engine.get_loc(index) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you discarding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If get_loc raises a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why bother calling get_loc here at all if its going to be called again in series._set_value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want to arrive in the except clause here, when index does not exist instead of the except clause in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can you elaborate on this? is it bc the except clause here does a takeable check whereas the one in _set_value does not? if so, could just add that check there? why is the deleted comment about "tests.frame.indexing.test_indexing and tests.indexing.test_partial" no longer relevant? |
||
series._set_value(index, value, takeable) | ||
except (KeyError, TypeError): | ||
# set using a non-recursive method & reset the cache | ||
if takeable: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ | |
|
||
from pandas.core.dtypes.cast import ( | ||
convert_dtypes, | ||
infer_dtype_from_scalar, | ||
maybe_cast_to_extension_array, | ||
validate_numeric_casting, | ||
) | ||
|
@@ -50,10 +51,12 @@ | |
is_bool, | ||
is_categorical_dtype, | ||
is_dict_like, | ||
is_dtype_equal, | ||
is_extension_array_dtype, | ||
is_integer, | ||
is_iterator, | ||
is_list_like, | ||
is_numeric_dtype, | ||
is_object_dtype, | ||
is_scalar, | ||
validate_all_hashable, | ||
|
@@ -1043,7 +1046,15 @@ def _set_with_engine(self, key, value): | |
# fails with AttributeError for IntervalIndex | ||
loc = self.index._engine.get_loc(key) | ||
validate_numeric_casting(self.dtype, value) | ||
self._values[loc] = value | ||
dtype, _ = infer_dtype_from_scalar(value) | ||
if is_dtype_equal(self.dtype, dtype): | ||
self._values[loc] = value | ||
else: | ||
# This only raises when index contains tuples | ||
try: | ||
self.loc[key] = value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a very strange change, why is it needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that is pretty ugly, but a Example:
This runs in the loc case and loc raises
Tuples are interpreted as components of MultiIndex There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _set_with_engine is a fastpath, really shouldn't be going through self.loc the whole infer_dtype_from_scalar stuff seems like it should be handled as part of validate_numeric_casting the issue this PR is addressing is only for when we are indexing an entire column, right? |
||
except KeyError: | ||
self._values[loc] = value | ||
|
||
def _set_with(self, key, value): | ||
# other: fancy integer or otherwise | ||
|
@@ -1105,11 +1116,17 @@ def _set_value(self, label, value, takeable: bool = False): | |
takeable : interpret the index as indexers, default False | ||
""" | ||
try: | ||
if takeable: | ||
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) | ||
if takeable and is_dtype_equal(self.dtype, dtype): | ||
self._values[label] = value | ||
elif takeable: | ||
self.iloc[label] = value | ||
else: | ||
loc = self.index.get_loc(label) | ||
validate_numeric_casting(self.dtype, value) | ||
if not is_dtype_equal(self.dtype, dtype) and is_numeric_dtype(dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you need to check if is numeric here? isn't just not dtype equal enough? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a test calling this function where scalar is a string value, which is expected to raise an ValueError, hence this check there. If the test is somehow erroneous, we could delete it. https://dev.azure.com/pandas-dev/pandas/_build/results?buildId=47080&view=logs&j=ba13898e-1dfb-5ace-9966-8b7af3677790 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you show the test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Raising lines are There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you integrate this with the higher level if/else, these nested if/else are harder to read, even if it means duplicating some code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
self.loc[label] = value | ||
return | ||
self._values[loc] = value | ||
except KeyError: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -114,31 +114,17 @@ def test_setitem_series_int64(self, val, exp_dtype, request): | |
obj = pd.Series([1, 2, 3, 4]) | ||
assert obj.dtype == np.int64 | ||
|
||
if exp_dtype is np.float64: | ||
exp = pd.Series([1, 1, 3, 4]) | ||
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) | ||
mark = pytest.mark.xfail(reason="GH12747 The result must be float") | ||
request.node.add_marker(mark) | ||
|
||
exp = pd.Series([1, val, 3, 4]) | ||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) | ||
|
||
@pytest.mark.parametrize( | ||
"val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)] | ||
"val,exp_dtype", [(np.int32(1), np.int32), (np.int16(2 ** 9), np.int16)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was downcasting before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes |
||
) | ||
def test_setitem_series_int8(self, val, exp_dtype, request): | ||
obj = pd.Series([1, 2, 3, 4], dtype=np.int8) | ||
assert obj.dtype == np.int8 | ||
|
||
if exp_dtype is np.int16: | ||
exp = pd.Series([1, 0, 3, 4], dtype=np.int8) | ||
self._assert_setitem_series_conversion(obj, val, exp, np.int8) | ||
mark = pytest.mark.xfail( | ||
reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16" | ||
) | ||
request.node.add_marker(mark) | ||
|
||
exp = pd.Series([1, val, 3, 4], dtype=np.int8) | ||
exp = pd.Series([1, val, 3, 4], dtype=exp_dtype) | ||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) | ||
|
||
@pytest.mark.parametrize( | ||
|
@@ -171,33 +157,17 @@ def test_setitem_series_complex128(self, val, exp_dtype): | |
@pytest.mark.parametrize( | ||
"val,exp_dtype", | ||
[ | ||
(1, np.int64), | ||
(3, np.int64), | ||
(1.1, np.float64), | ||
(1 + 1j, np.complex128), | ||
(1, "object"), | ||
(3, "object"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so these were doing an inferred cast before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. numpy inferred cast, yes |
||
(1.1, "object"), | ||
(1 + 1j, "object"), | ||
(True, np.bool_), | ||
], | ||
) | ||
def test_setitem_series_bool(self, val, exp_dtype, request): | ||
obj = pd.Series([True, False, True, False]) | ||
assert obj.dtype == np.bool_ | ||
|
||
mark = None | ||
if exp_dtype is np.int64: | ||
exp = pd.Series([True, True, True, False]) | ||
self._assert_setitem_series_conversion(obj, val, exp, np.bool_) | ||
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be int") | ||
elif exp_dtype is np.float64: | ||
exp = pd.Series([True, True, True, False]) | ||
self._assert_setitem_series_conversion(obj, val, exp, np.bool_) | ||
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float") | ||
elif exp_dtype is np.complex128: | ||
exp = pd.Series([True, True, True, False]) | ||
self._assert_setitem_series_conversion(obj, val, exp, np.bool_) | ||
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be complex") | ||
if mark is not None: | ||
request.node.add_marker(mark) | ||
|
||
exp = pd.Series([True, val, True, False]) | ||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,6 @@ def test_partial_setting(self): | |
with pytest.raises(IndexError, match=msg): | ||
s.iloc[3] = 5.0 | ||
|
||
msg = "index 3 is out of bounds for axis 0 with size 3" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the message now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Messages for |
||
with pytest.raises(IndexError, match=msg): | ||
s.iat[3] = 5.0 | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did not adjust -> not adjusting