Skip to content

TST: Fix xfails for non-box maybe_promote on integer dtypes #28864

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 52 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,58 @@ def maybe_promote(dtype, fill_value=np.nan):
dtype = np.object_
elif issubclass(dtype.type, np.integer):
# upcast to prevent overflow
arr = np.asarray(fill_value)
if arr != arr.astype(dtype):
dtype = arr.dtype
mst = np.min_scalar_type(fill_value)
if mst > dtype:
# np.dtype ordering considers:
# int[n] < int[2*n]
# uint[n] < uint[2*n]
# u?int[n] < object_
dtype = mst

elif np.can_cast(fill_value, dtype):
pass

elif dtype.kind == "u" and mst.kind == "i":
dtype = np.promote_types(dtype, mst)
if dtype.kind == "f":
# Case where we disagree with numpy
dtype = np.dtype(np.object_)

elif dtype.kind == "i" and mst.kind == "u":

if fill_value > np.iinfo(np.int64).max:
# object is the only way to represent fill_value and keep
# the range allowed by the given dtype
dtype = np.dtype(np.object_)

elif mst.itemsize < dtype.itemsize:
pass

elif dtype.itemsize == mst.itemsize:
# We never cast signed to unsigned because that loses
# parts of the original range, so find the smallest signed
# integer that can hold all of `mst`.
ndt = {
np.int64: np.object_,
np.int32: np.int64,
np.int16: np.int32,
np.int8: np.int16,
}[dtype.type]
dtype = np.dtype(ndt)

else:
# bump to signed integer dtype that holds all of `mst` range
# Note: we have to use itemsize because some (windows)
# builds don't satisfiy e.g. np.uint32 == np.uint32
ndt = {
4: np.int64,
2: np.int32,
1: np.int16, # TODO: Test for this case
}[mst.itemsize]
dtype = np.dtype(ndt)

fill_value = dtype.type(fill_value)

elif issubclass(dtype.type, np.floating):
# check if we can cast
if _check_lossless_cast(fill_value, dtype):
Expand Down
32 changes: 11 additions & 21 deletions pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,17 @@ def _assert_match(result_fill_value, expected_fill_value):
# GH#23982/25425 require the same type in addition to equality/NA-ness
res_type = type(result_fill_value)
ex_type = type(expected_fill_value)
assert res_type == ex_type
if res_type.__name__ == "uint64":
# No idea why, but these (sometimes) do not compare as equal
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is numpy/numpy#12525 and related issues

assert ex_type.__name__ == "uint64"
elif res_type.__name__ == "ulonglong":
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@h-vetinari did you find a nicer way to handle these?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I didn't have to deal with ulonglong (pretty sure that's platform-specific).

# On some builds we get this instead of np.uint64
# Note: cant check res_type.dtype.itemsize directly on numpy 1.18
assert res_type(0).itemsize == 8
assert ex_type == res_type or ex_type == np.uint64
else:
# On some builds, type comparison fails, e.g. np.int32 != np.int32
assert res_type == ex_type or res_type.__name__ == ex_type.__name__

match_value = result_fill_value == expected_fill_value

Expand Down Expand Up @@ -275,26 +285,6 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box):
expected_dtype = np.dtype(expected_dtype)
boxed, box_dtype = box # read from parametrized fixture

if not boxed:
if expected_dtype == object:
pytest.xfail("overflow error")
if expected_dtype == "int32":
pytest.xfail("always upcasts to platform int")
if dtype == "int8" and expected_dtype == "int16":
pytest.xfail("casts to int32 instead of int16")
if (
issubclass(dtype.type, np.unsignedinteger)
and np.iinfo(dtype).max < fill_value <= np.iinfo("int64").max
):
pytest.xfail("falsely casts to signed")
if (dtype, expected_dtype) in [
("uint8", "int16"),
("uint32", "int64"),
] and fill_value != np.iinfo("int32").min - 1:
pytest.xfail("casts to int32 instead of int8/int16")
# this following xfail is "only" a consequence of the - now strictly
# enforced - principle that maybe_promote_with_scalar always casts
pytest.xfail("wrong return type of fill_value")
if boxed:
if expected_dtype != object:
pytest.xfail("falsely casts to object")
Expand Down