From 185c24c3c7847bb6007fc2619d0dc2bf4e6a2f71 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Oct 2019 19:26:12 -0700 Subject: [PATCH 1/6] passing --- pandas/core/dtypes/cast.py | 53 ++++++++++++++++++++++-- pandas/tests/dtypes/cast/test_promote.py | 23 +++++----- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5b13e13bb20ba..e1a88b626cb3a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -408,9 +408,56 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ elif issubclass(dtype.type, np.integer): # upcast to prevent overflow - arr = np.asarray(fill_value) - if arr != arr.astype(dtype): - dtype = arr.dtype + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # np.dtype ordering considers: + # int[n] < int[2*n] + # uint[n] < uint[2*n] + # u?int[n] < object_ + dtype = mst + fill_value = dtype.type(fill_value) + + elif np.can_cast(fill_value, dtype): + pass + + elif dtype.kind == "u" and mst.kind == "i": + dtype = np.promote_types(dtype, mst) + if dtype.kind == "f": + # Case where we disagree with numpy + dtype = np.dtype(np.object_) + fill_value = dtype.type(fill_value) + + elif dtype.kind == "i" and fill_value > np.iinfo(np.int64).max: + # object is the only way to represent fill_value and keep + # the range allowed by the given dtype + dtype = np.dtype(np.object_) + + elif dtype.kind == "i" and mst.kind == "u" and dtype.itemsize == mst.itemsize: + # We never cast signed to unsigned because that loses + # parts of the original range, so find the smallest signed + # integer that can hold all of `mst`. + ndt = {np.int64: np.object_, np.int32: np.int64, np.int16: np.int32, np.int8: np.int16}[dtype.type] + dtype = np.dtype(ndt) + assert dtype.type(fill_value) == fill_value + #if dtype == np.int64: + # # no bigger signed integer dtypes to work with + # dtype = np.dtype(np.object_) + #elif dtype == np.int32 and mdt == np.uint32: + # dtype = np.dtype() + #else: + # sdt = "i" + str(dtype.itemsize*2) + # dtype = np.dtype(sdt) + + elif dtype.kind == "i" and mst.kind == "u": + if mst.itemsize < dtype.itemsize: + pass + elif mst == np.uint32: + dtype = np.dtype(np.int64) + else: + raise NotImplementedError(dtype, mst) + + fill_value = dtype.type(fill_value) + elif issubclass(dtype.type, np.floating): # check if we can cast if _check_lossless_cast(fill_value, dtype): diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index e4e5a22ea6ca0..a5b17f4c8b813 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -151,7 +151,11 @@ def _assert_match(result_fill_value, expected_fill_value): # GH#23982/25425 require the same type in addition to equality/NA-ness res_type = type(result_fill_value) ex_type = type(expected_fill_value) - assert res_type == ex_type + if res_type.__name__ == "uint64": + # No idea why, but these do not compare as equal + assert ex_type.__name__ == "uint64" + else: + assert res_type == ex_type match_value = result_fill_value == expected_fill_value @@ -276,25 +280,24 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): boxed, box_dtype = box # read from parametrized fixture if not boxed: - if expected_dtype == object: - pytest.xfail("overflow error") if expected_dtype == "int32": pytest.xfail("always upcasts to platform int") - if dtype == "int8" and expected_dtype == "int16": + elif dtype == "int8" and expected_dtype == "int16": pytest.xfail("casts to int32 instead of int16") - if ( + elif ( issubclass(dtype.type, np.unsignedinteger) and np.iinfo(dtype).max < fill_value <= np.iinfo("int64").max ): pytest.xfail("falsely casts to signed") - if (dtype, expected_dtype) in [ + elif (dtype, expected_dtype) in [ ("uint8", "int16"), ("uint32", "int64"), ] and fill_value != np.iinfo("int32").min - 1: - pytest.xfail("casts to int32 instead of int8/int16") - # this following xfail is "only" a consequence of the - now strictly - # enforced - principle that maybe_promote_with_scalar always casts - pytest.xfail("wrong return type of fill_value") + pass#pytest.xfail("casts to int32 instead of int8/int16") + elif expected_dtype != object: + # this following xfail is "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + pass#pytest.xfail("wrong return type of fill_value") if boxed: if expected_dtype != object: pytest.xfail("falsely casts to object") From 1d6f37de75e774e6da35868482dca79e681d4131 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Oct 2019 19:33:55 -0700 Subject: [PATCH 2/6] TST: Fix integer non-boxed xfails --- pandas/core/dtypes/cast.py | 2 ++ pandas/tests/dtypes/cast/test_promote.py | 19 ------------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e1a88b626cb3a..179da5ec28ba5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -453,6 +453,8 @@ def maybe_promote(dtype, fill_value=np.nan): pass elif mst == np.uint32: dtype = np.dtype(np.int64) + elif mst == np.uint16: + dtype = np.dtype(np.int32) else: raise NotImplementedError(dtype, mst) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index a5b17f4c8b813..ece5978f563a4 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -279,25 +279,6 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): expected_dtype = np.dtype(expected_dtype) boxed, box_dtype = box # read from parametrized fixture - if not boxed: - if expected_dtype == "int32": - pytest.xfail("always upcasts to platform int") - elif dtype == "int8" and expected_dtype == "int16": - pytest.xfail("casts to int32 instead of int16") - elif ( - issubclass(dtype.type, np.unsignedinteger) - and np.iinfo(dtype).max < fill_value <= np.iinfo("int64").max - ): - pytest.xfail("falsely casts to signed") - elif (dtype, expected_dtype) in [ - ("uint8", "int16"), - ("uint32", "int64"), - ] and fill_value != np.iinfo("int32").min - 1: - pass#pytest.xfail("casts to int32 instead of int8/int16") - elif expected_dtype != object: - # this following xfail is "only" a consequence of the - now strictly - # enforced - principle that maybe_promote_with_scalar always casts - pass#pytest.xfail("wrong return type of fill_value") if boxed: if expected_dtype != object: pytest.xfail("falsely casts to object") From 0cc22ff4201fb7a5462ddd5f67e8e2c38236637b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Oct 2019 20:30:32 -0700 Subject: [PATCH 3/6] streamline --- pandas/core/dtypes/cast.py | 54 ++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 179da5ec28ba5..6df54fcc8af67 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -415,7 +415,6 @@ def maybe_promote(dtype, fill_value=np.nan): # uint[n] < uint[2*n] # u?int[n] < object_ dtype = mst - fill_value = dtype.type(fill_value) elif np.can_cast(fill_value, dtype): pass @@ -425,38 +424,37 @@ def maybe_promote(dtype, fill_value=np.nan): if dtype.kind == "f": # Case where we disagree with numpy dtype = np.dtype(np.object_) - fill_value = dtype.type(fill_value) - elif dtype.kind == "i" and fill_value > np.iinfo(np.int64).max: - # object is the only way to represent fill_value and keep - # the range allowed by the given dtype - dtype = np.dtype(np.object_) + elif dtype.kind == "i" and mst.kind == "u": - elif dtype.kind == "i" and mst.kind == "u" and dtype.itemsize == mst.itemsize: - # We never cast signed to unsigned because that loses - # parts of the original range, so find the smallest signed - # integer that can hold all of `mst`. - ndt = {np.int64: np.object_, np.int32: np.int64, np.int16: np.int32, np.int8: np.int16}[dtype.type] - dtype = np.dtype(ndt) - assert dtype.type(fill_value) == fill_value - #if dtype == np.int64: - # # no bigger signed integer dtypes to work with - # dtype = np.dtype(np.object_) - #elif dtype == np.int32 and mdt == np.uint32: - # dtype = np.dtype() - #else: - # sdt = "i" + str(dtype.itemsize*2) - # dtype = np.dtype(sdt) + if fill_value > np.iinfo(np.int64).max: + # object is the only way to represent fill_value and keep + # the range allowed by the given dtype + dtype = np.dtype(np.object_) - elif dtype.kind == "i" and mst.kind == "u": - if mst.itemsize < dtype.itemsize: + elif mst.itemsize < dtype.itemsize: pass - elif mst == np.uint32: - dtype = np.dtype(np.int64) - elif mst == np.uint16: - dtype = np.dtype(np.int32) + + elif dtype.itemsize == mst.itemsize: + # We never cast signed to unsigned because that loses + # parts of the original range, so find the smallest signed + # integer that can hold all of `mst`. + ndt = { + np.int64: np.object_, + np.int32: np.int64, + np.int16: np.int32, + np.int8: np.int16, + }[dtype.type] + dtype = np.dtype(ndt) + else: - raise NotImplementedError(dtype, mst) + # bump to signed integer dtype that holds all of `mst` range + ndt = { + np.uint32: np.int64, + np.uint16: np.int32, + np.uint8: np.int16, # TODO: Test for this case + }[mst.type] + dtype = np.dtype(ndt) fill_value = dtype.type(fill_value) From 40414b174d84620a430817f9b117463d5971548f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Oct 2019 08:16:04 -0700 Subject: [PATCH 4/6] troubleshoot azure --- pandas/tests/dtypes/cast/test_promote.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index ece5978f563a4..7eaaa073b0b08 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -154,6 +154,10 @@ def _assert_match(result_fill_value, expected_fill_value): if res_type.__name__ == "uint64": # No idea why, but these do not compare as equal assert ex_type.__name__ == "uint64" + elif res_type.__name__ == "ulonglong": + # On some builds we get this instead of np.uint64 + assert res_type.dtype.itemsize == 8 + assert ex_type == res_type or ex_type == np.uint64 else: assert res_type == ex_type From e3fcf81aca1c5a06fdd9777ba64471e3eed97ebf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Oct 2019 09:44:21 -0700 Subject: [PATCH 5/6] troubleshoot more --- pandas/tests/dtypes/cast/test_promote.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 7eaaa073b0b08..55b22875855f4 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -152,14 +152,15 @@ def _assert_match(result_fill_value, expected_fill_value): res_type = type(result_fill_value) ex_type = type(expected_fill_value) if res_type.__name__ == "uint64": - # No idea why, but these do not compare as equal + # No idea why, but these (sometimes) do not compare as equal assert ex_type.__name__ == "uint64" elif res_type.__name__ == "ulonglong": # On some builds we get this instead of np.uint64 assert res_type.dtype.itemsize == 8 assert ex_type == res_type or ex_type == np.uint64 else: - assert res_type == ex_type + # On some builds, type comparison fails, e.g. np.int32 != np.int32 + assert res_type == ex_type or res_type.__name__ == ex_type.__name__ match_value = result_fill_value == expected_fill_value From 703542b906d6b23206b3d269f11c8d9ab4cfa645 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Oct 2019 13:33:43 -0700 Subject: [PATCH 6/6] troubleshoot CI --- pandas/core/dtypes/cast.py | 10 ++++++---- pandas/tests/dtypes/cast/test_promote.py | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6df54fcc8af67..a7fdd6759ba95 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -449,11 +449,13 @@ def maybe_promote(dtype, fill_value=np.nan): else: # bump to signed integer dtype that holds all of `mst` range + # Note: we have to use itemsize because some (windows) + # builds don't satisfiy e.g. np.uint32 == np.uint32 ndt = { - np.uint32: np.int64, - np.uint16: np.int32, - np.uint8: np.int16, # TODO: Test for this case - }[mst.type] + 4: np.int64, + 2: np.int32, + 1: np.int16, # TODO: Test for this case + }[mst.itemsize] dtype = np.dtype(ndt) fill_value = dtype.type(fill_value) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 55b22875855f4..8d10ed26a80fa 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -156,7 +156,8 @@ def _assert_match(result_fill_value, expected_fill_value): assert ex_type.__name__ == "uint64" elif res_type.__name__ == "ulonglong": # On some builds we get this instead of np.uint64 - assert res_type.dtype.itemsize == 8 + # Note: cant check res_type.dtype.itemsize directly on numpy 1.18 + assert res_type(0).itemsize == 8 assert ex_type == res_type or ex_type == np.uint64 else: # On some builds, type comparison fails, e.g. np.int32 != np.int32