-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: pd.compare does not recognize differences when comparing values with null Int64 data type #48966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: pd.compare does not recognize differences when comparing values with null Int64 data type #48966
Changes from 9 commits
f027ff9
0f3fcef
9c0a19b
0b5ec5c
d72e0e3
4412e33
9b97320
ffbb131
bae0808
4838dc0
3f739dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -238,17 +238,50 @@ def test_invalid_input_result_names(result_names): | |||||
df1.compare(df2, result_names=result_names) | ||||||
|
||||||
|
||||||
def test_compare_ea_and_np_dtype(): | ||||||
# GH#44014 | ||||||
df1 = pd.DataFrame({"a": [4.0, 4], "b": [1.0, 2]}) | ||||||
df2 = pd.DataFrame({"a": pd.Series([1, pd.NA], dtype="Int64"), "b": [1.0, 2]}) | ||||||
result = df1.compare(df2, keep_shape=True) | ||||||
@pytest.mark.parametrize( | ||||||
"ea_val,np_dtype_val", | ||||||
[(4, pd.NA), (pd.NA, pd.NA), (pd.NA, 4)], | ||||||
) | ||||||
def test_compare_ea_and_np_dtype(ea_val, np_dtype_val): | ||||||
ea = [4.0, ea_val] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add gh refs please rename variable, ea stands for extension array There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will add gh refs is arr ok?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep works for me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the quick review. |
||||||
np_dtype = pd.Series([1, np_dtype_val], dtype="Int64") | ||||||
|
||||||
ea_df = pd.DataFrame({"a": ea, "b": [1.0, 2]}) | ||||||
np_dtype_df = pd.DataFrame({"a": np_dtype, "b": [1.0, 2]}) | ||||||
expected = pd.DataFrame( | ||||||
{ | ||||||
("a", "self"): ea, | ||||||
("a", "other"): np_dtype, | ||||||
("b", "self"): np.nan, | ||||||
("b", "other"): np.nan, | ||||||
} | ||||||
) | ||||||
result = ea_df.compare(np_dtype_df, keep_shape=True) | ||||||
tm.assert_frame_equal(result, expected) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"df1_val,df2_val,diff_self,diff_other", | ||||||
[ | ||||||
(4, 3, 4, 3), | ||||||
(4, 4, pd.NA, pd.NA), | ||||||
(4, pd.NA, 4, pd.NA), | ||||||
(pd.NA, pd.NA, pd.NA, pd.NA), | ||||||
], | ||||||
) | ||||||
def test_compare_nullable_int64_dtype(df1_val, df2_val, diff_self, diff_other): | ||||||
|
||||||
df1 = pd.DataFrame({"a": pd.Series([df1_val, pd.NA], dtype="Int64"), "b": [1.0, 2]}) | ||||||
df2 = df1.copy() | ||||||
df2.loc[0, "a"] = df2_val | ||||||
|
||||||
expected = pd.DataFrame( | ||||||
{ | ||||||
("a", "self"): [4.0, np.nan], | ||||||
("a", "other"): pd.Series([1, pd.NA], dtype="Int64"), | ||||||
("a", "self"): pd.Series([diff_self, pd.NA], dtype="Int64"), | ||||||
("a", "other"): pd.Series([diff_other, pd.NA], dtype="Int64"), | ||||||
("b", "self"): np.nan, | ||||||
("b", "other"): np.nan, | ||||||
} | ||||||
) | ||||||
result = df1.compare(df2, keep_shape=True) | ||||||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these names are confusing, can you clarify
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ea_val
andnp_dtype_val
intended to communicate value for extension array and value for np dtype array respectively.is this ok?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep this is clearer