-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Bug in loc did not change dtype when complete column was assigned #37749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
6450a2c
1599c5c
4d39612
f9f37cb
5cf355b
8d203f9
e35e009
4c391da
71fbf9f
babcd38
caa6046
8b95236
3b98ee0
f9b8a59
4bef38e
27ea3e2
f94277b
279e812
d5f6150
706dc6a
66d4b4e
fa25075
3c06ba6
a33659c
0f556c4
181e62a
b759ac9
a353930
d28e1e1
1aa8522
1bc0d46
61aab16
14fe5a8
26b5d6f
913ffea
e6e22f3
23f6f3b
99b87c9
f97a252
700ce6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -289,6 +289,27 @@ def test_setitem_periodindex(self): | |
assert isinstance(rs.index, PeriodIndex) | ||
tm.assert_index_equal(rs.index, rng) | ||
|
||
@pytest.mark.parametrize("klass", [list, np.array]) | ||
def test_iloc_setitem_bool_indexer(self, klass): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test name is good, belongs in tests.indexing.test_iloc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
# GH: 36741 | ||
df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) | ||
indexer = klass([True, False, False]) | ||
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 | ||
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
def test_setitem_scalar_dtype_change(self): | ||
# GH#27583 | ||
df = DataFrame({"a": [0.0], "b": [0.0]}) | ||
df[["a", "b"]] = 0 | ||
expected = DataFrame({"a": [0], "b": [0]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
df = DataFrame({"a": [0.0], "b": [0.0]}) | ||
df["b"] = 0 | ||
expected = DataFrame({"a": [0.0], "b": [0]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
|
||
class TestDataFrameSetItemSlicing: | ||
def test_setitem_slice_position(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
concat, | ||
date_range, | ||
isna, | ||
to_datetime, | ||
) | ||
import pandas._testing as tm | ||
from pandas.api.types import is_scalar | ||
|
@@ -834,6 +835,32 @@ def test_iloc_setitem_dictionary_value(self): | |
expected = DataFrame({"x": [1, 9], "y": [2, 99]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
def test_iloc_setitem_conversion_to_datetime(self): | ||
# GH#20511 | ||
df = DataFrame( | ||
[["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], | ||
columns=["date0", "date1"], | ||
) | ||
df.iloc[:, [0]] = df.iloc[:, [0]].apply( | ||
lambda x: to_datetime(x, errors="coerce") | ||
) | ||
expected = DataFrame( | ||
{ | ||
"date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use Timestamp instead of to_datetime There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
"date1": ["2016-01-01", "2015-01-01"], | ||
} | ||
) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
def test_iloc_conversion_to_float_32_for_columns_list(self): | ||
# GH#33198 | ||
arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64) | ||
df = DataFrame(arr) | ||
df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32) | ||
result = df.dtypes.value_counts() | ||
expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
class TestILocErrors: | ||
# NB: this test should work for _any_ Series we can pass as | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1127,6 +1127,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): | |
|
||
tm.assert_frame_equal(expected, df) | ||
|
||
def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self): | ||
# GH#20635 | ||
df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) | ||
df.loc[:, "C"] = df["C"].astype("int64") | ||
expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
@pytest.mark.parametrize("dtype", ["int64", "Int64"]) | ||
def test_loc_setitem_null_slice_different_dtypes(self, dtype): | ||
# GH#20635 | ||
df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) | ||
rhs = df[["B", "C"]].astype("int64").astype(dtype) | ||
df.loc[:, ["B", "C"]] = rhs | ||
expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) | ||
expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
|
||
class TestLocWithMultiIndex: | ||
@pytest.mark.parametrize( | ||
|
@@ -2000,6 +2017,14 @@ def test_loc_setitem_dt64tz_values(self): | |
result = s2["a"] | ||
assert result == expected | ||
|
||
@pytest.mark.parametrize("dtype", ["int64", "Int64"]) | ||
def test_setitem_series_null_slice_different_dtypes(self, dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test_loc_setitem_... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx, changed all test names and fixed failing tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one still shows as test_setitem instead of test_loc_setitem There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Weird, was certain to have changed this. Now it shows correctly |
||
# GH: 20635 | ||
ser = Series(["3", "4"], name="A") | ||
ser.loc[:] = ser.astype("int64").astype(dtype) | ||
expected = Series([3, 4], name="A", dtype=dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this is doing the opposite of #39163. did we decide to revert part or all of that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since yours is significantly newer I am fine with „closing“ this. Would check if some of the issues are fixed |
||
tm.assert_series_equal(ser, expected) | ||
|
||
|
||
@pytest.mark.parametrize("value", [1, 1.5]) | ||
def test_loc_int_in_object_index(frame_or_series, value): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can be the else condtiion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, value can be anything from int, float to numpy array. I think this check is only necessary if we have Series or DataFrame. Maybe with an array?