From d6356206ab681f6732e1a7d270a689a667c69f08 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 10:09:58 -0500 Subject: [PATCH 01/13] TST: More maybe_promote xfails --- pandas/core/dtypes/cast.py | 15 ++++++++++++--- pandas/tests/dtypes/cast/test_promote.py | 11 ----------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5801384bf8db9..1b5340468034a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -348,12 +348,21 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.dtype(np.object_) fill_value = np.nan + if dtype == np.object_ or dtype.kind in ["U", "S"]: + # We treat string-like dtypes as object, and _always_ fill + # with np.nan + fill_value = np.nan + # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): - try: - fill_value = tslibs.Timestamp(fill_value).to_datetime64() - except (TypeError, ValueError): + if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: + # Trying to insert tzaware into tznaive, have to cast to object dtype = np.dtype(np.object_) + else: + try: + fill_value = tslibs.Timestamp(fill_value).to_datetime64() + except (TypeError, ValueError): + dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.timedelta64): try: fv = tslibs.Timedelta(fill_value) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 1b7de9b20f42f..d80004b674dbc 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -484,9 +484,6 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz( fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) boxed, box_dtype = box # read from parametrized fixture - if dtype.kind == "M" and not boxed: - pytest.xfail("Comes back as M8 instead of object") - fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] # filling any numpy dtype with datetimetz casts to object @@ -572,11 +569,6 @@ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced, bo fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed and box_dtype is None and fill_dtype.kind == "m": - pytest.xfail("wrong missing value marker") - if boxed and box_dtype is None and fill_dtype.kind == "M": - pytest.xfail("wrong missing value marker") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -639,9 +631,6 @@ def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype_reduced, bo fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype): - pytest.xfail("wrong missing value marker") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] From 677825b9b02c609a490631415866a929a9f2b2a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 10:51:32 -0500 Subject: [PATCH 02/13] cleanup --- pandas/core/dtypes/cast.py | 3 +-- pandas/tests/dtypes/cast/test_promote.py | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1b5340468034a..7bce86b572f0c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -352,6 +352,7 @@ def maybe_promote(dtype, fill_value=np.nan): # We treat string-like dtypes as object, and _always_ fill # with np.nan fill_value = np.nan + dtype = np.dtype(np.object_) # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): @@ -424,8 +425,6 @@ def maybe_promote(dtype, fill_value=np.nan): # in case we have a string that looked like a number if is_extension_array_dtype(dtype): pass - elif is_datetime64tz_dtype(dtype): - pass elif issubclass(np.dtype(dtype).type, (bytes, str)): dtype = np.object_ diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index d80004b674dbc..1881ce295a4a2 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -332,18 +332,17 @@ def test_maybe_promote_any_with_datetime64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_datetime64_dtype(dtype): - if boxed and ( - box_dtype == object - or (box_dtype is None and not is_datetime64_dtype(type(fill_value))) - ): - pytest.xfail("falsely upcasts to object") - else: - if boxed and ( - box_dtype == "dt_dtype" - or (box_dtype is None and is_datetime64_dtype(type(fill_value))) - ): - pytest.xfail("mix of lack of upcasting, resp. wrong missing value") + if boxed: + if is_datetime64_dtype(dtype): + if box_dtype == object: + pytest.xfail("falsely upcasts to object") + elif box_dtype is None and not is_datetime64_dtype(type(fill_value)): + pytest.xfail("falsely upcasts to object") + else: + if box_dtype == "dt_dtype": + pytest.xfail("mix of lack of upcasting, resp. wrong missing value") + elif box_dtype is None and is_datetime64_dtype(type(fill_value)): + pytest.xfail("mix of lack of upcasting, resp. wrong missing value") # special case for box_dtype box_dtype = np.dtype(datetime64_dtype) if box_dtype == "dt_dtype" else box_dtype From ac2cb8713884f12ba14e057936376d0cba23daf8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 10:52:40 -0500 Subject: [PATCH 03/13] trim xfail cases --- pandas/tests/dtypes/cast/test_promote.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 1881ce295a4a2..6a4c10fd2eaa9 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -334,14 +334,10 @@ def test_maybe_promote_any_with_datetime64( if boxed: if is_datetime64_dtype(dtype): - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - elif box_dtype is None and not is_datetime64_dtype(type(fill_value)): + if box_dtype is None and not is_datetime64_dtype(type(fill_value)): pytest.xfail("falsely upcasts to object") else: - if box_dtype == "dt_dtype": - pytest.xfail("mix of lack of upcasting, resp. wrong missing value") - elif box_dtype is None and is_datetime64_dtype(type(fill_value)): + if box_dtype is None and is_datetime64_dtype(type(fill_value)): pytest.xfail("mix of lack of upcasting, resp. wrong missing value") # special case for box_dtype From 7137dda9c5597d4021e8c3457b7abb296705fa41 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 11:21:54 -0500 Subject: [PATCH 04/13] cleanup --- pandas/tests/dtypes/cast/test_promote.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 6a4c10fd2eaa9..66bf73758b000 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -332,12 +332,12 @@ def test_maybe_promote_any_with_datetime64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed: + if boxed and box_dtype is None: if is_datetime64_dtype(dtype): - if box_dtype is None and not is_datetime64_dtype(type(fill_value)): + if not is_datetime64_dtype(type(fill_value)): pytest.xfail("falsely upcasts to object") else: - if box_dtype is None and is_datetime64_dtype(type(fill_value)): + if is_datetime64_dtype(type(fill_value)): pytest.xfail("mix of lack of upcasting, resp. wrong missing value") # special case for box_dtype From 5ac4da58593421f27b1f68512dec57faf170b20e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 11:29:19 -0500 Subject: [PATCH 05/13] cleanup box xfails --- pandas/tests/dtypes/cast/test_promote.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 66bf73758b000..bd1abc716c8e8 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -524,15 +524,15 @@ def test_maybe_promote_any_with_timedelta64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_timedelta64_dtype(dtype): - if boxed and ( - box_dtype == object - or (box_dtype is None and not is_timedelta64_dtype(type(fill_value))) - ): - pytest.xfail("falsely upcasts to object") - else: - if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)): - pytest.xfail("does not upcast correctly") + if boxed: + if is_timedelta64_dtype(dtype): + if box_dtype == object: + pytest.xfail("falsely upcasts to object") + elif box_dtype is None and not is_timedelta64_dtype(type(fill_value)): + pytest.xfail("falsely upcasts to object") + else: + if box_dtype is None and is_timedelta64_dtype(type(fill_value)): + pytest.xfail("does not upcast correctly") # special case for box_dtype box_dtype = np.dtype(timedelta64_dtype) if box_dtype == "td_dtype" else box_dtype From 7fe25bc56632ecb63b4e3e730551078ce51140cc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 11:31:05 -0500 Subject: [PATCH 06/13] clean up not-boxed xfails --- pandas/tests/dtypes/cast/test_promote.py | 28 +++++++++--------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index bd1abc716c8e8..0c30cfb3d5e71 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -678,24 +678,16 @@ def test_maybe_promote_any_numpy_dtype_with_na( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if ( - dtype == bytes - and not boxed - and fill_value is not None - and fill_value is not NaT - ): - pytest.xfail("does not upcast to object") - elif dtype == "uint64" and not boxed and fill_value == iNaT: - pytest.xfail("does not upcast correctly") - # below: opinionated that iNaT should be interpreted as missing value - elif ( - not boxed - and (is_float_dtype(dtype) or is_complex_dtype(dtype)) - and fill_value == iNaT - ): - pytest.xfail("does not cast to missing value marker correctly") - elif (is_string_dtype(dtype) or dtype == bool) and not boxed and fill_value == iNaT: - pytest.xfail("does not cast to missing value marker correctly") + if not boxed: + if dtype == bytes and fill_value is not None and fill_value is not NaT: + pytest.xfail("does not upcast to object") + elif dtype == "uint64" and fill_value == iNaT: + pytest.xfail("does not upcast correctly") + # below: opinionated that iNaT should be interpreted as missing value + elif (is_float_dtype(dtype) or is_complex_dtype(dtype)) and fill_value == iNaT: + pytest.xfail("does not cast to missing value marker correctly") + elif (is_string_dtype(dtype) or dtype == bool) and fill_value == iNaT: + pytest.xfail("does not cast to missing value marker correctly") if is_integer_dtype(dtype) and dtype == "uint64" and fill_value == iNaT: # uint64 + negative int casts to object; iNaT is considered as missing From 44a616214977a8029f9e34d7e26ba59b2565f07b Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 3 Oct 2019 07:56:10 +0100 Subject: [PATCH 07/13] DEPR: Deprecate Index.set_value (#28621) --- doc/source/reference/indexing.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 4 +++- pandas/core/indexes/base.py | 14 +++++++++++++- pandas/tests/indexes/test_base.py | 13 +++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 576f734d517aa..dd59a99b3df9e 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -166,7 +166,6 @@ Selecting Index.get_slice_bound Index.get_value Index.get_values - Index.set_value Index.isin Index.slice_indexer Index.slice_locs diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2668734031ee1..16d23d675a8bb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -123,7 +123,9 @@ Documentation Improvements Deprecations ~~~~~~~~~~~~ -- +- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, + value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` + is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b5f9fb61fce8..afa4f1a5a8c76 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -205,7 +205,9 @@ class Index(IndexOpsMixin, PandasObject): """ # tolist is not actually deprecated, just suppressed in the __dir__ - _deprecations = DirNamesMixin._deprecations | frozenset(["tolist", "dtype_str"]) + _deprecations = DirNamesMixin._deprecations | frozenset( + ["tolist", "dtype_str", "set_value"] + ) # To hand over control to subclasses _join_precedence = 1 @@ -4680,10 +4682,20 @@ def set_value(self, arr, key, value): """ Fast lookup of value from 1-dimensional ndarray. + .. deprecated:: 1.0 + Notes ----- Only use this if you know what you're doing. """ + warnings.warn( + ( + "The 'set_value' method is deprecated, and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=2, + ) self._engine.set_value( com.values_from_object(arr), com.values_from_object(key), value ) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d1ed79118d2fa..82d5ddd1ac358 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1908,16 +1908,21 @@ def test_is_monotonic_incomparable(self, attr): index = Index([5, datetime.now(), 7]) assert not getattr(index, attr) - def test_get_set_value(self): + def test_set_value_deprecated(self): + # GH 28621 + idx = self.create_index() + arr = np.array([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + idx.set_value(arr, idx[1], 80) + assert arr[1] == 80 + + def test_get_value(self): # TODO: Remove function? GH 19728 values = np.random.randn(100) date = self.dateIndex[67] assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) - self.dateIndex.set_value(values, date, 10) - assert values[67] == 10 - @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) @pytest.mark.parametrize( "index,expected", From 624dc21921e8fd88b5a90c0da86d7c8c79b4fbe5 Mon Sep 17 00:00:00 2001 From: Tirth Jain Date: Thu, 3 Oct 2019 22:41:45 +0530 Subject: [PATCH 08/13] CLN: Centralised _check_percentile (#27584) --- pandas/core/algorithms.py | 3 --- pandas/core/frame.py | 8 ++++++-- pandas/core/generic.py | 23 ++++++----------------- pandas/core/series.py | 4 ++-- pandas/util/_validators.py | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 24 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5a479667f0227..4073ede84c6f6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -246,7 +246,6 @@ def _get_hashtable_algo(values): def _get_data_algo(values, func_map): - if is_categorical_dtype(values): values = values._values_for_rank() @@ -297,7 +296,6 @@ def match(to_match, values, na_sentinel=-1): result = table.lookup(to_match) if na_sentinel != -1: - # replace but return a numpy array # use a Series because it handles dtype conversions properly from pandas import Series @@ -1163,7 +1161,6 @@ def compute(self, method): # slow method if n >= len(self.obj): - reverse_it = self.keep == "last" or method == "nlargest" ascending = method == "nsmallest" slc = np.s_[::-1] if reverse_it else np.s_[:] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67360122ed021..1ab62d7a9e3bf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -31,7 +31,11 @@ deprecate_kwarg, rewrite_axis_style_signature, ) -from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg +from pandas.util._validators import ( + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) from pandas.core.dtypes.cast import ( cast_scalar_to_array, @@ -8178,7 +8182,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): C 1 days 12:00:00 Name: 0.5, dtype: object """ - self._check_percentile(q) + validate_percentile(q) data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cb21588c8ba1a..ddbdb48ab0441 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -32,7 +32,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_percentile, +) from pandas.core.dtypes.common import ( ensure_int64, @@ -10168,7 +10172,7 @@ def describe(self, percentiles=None, include=None, exclude=None): percentiles = list(percentiles) # get them all to be in [0, 1] - self._check_percentile(percentiles) + validate_percentile(percentiles) # median should always be included if 0.5 not in percentiles: @@ -10272,21 +10276,6 @@ def describe_1d(data): d.columns = data.columns.copy() return d - def _check_percentile(self, q): - """ - Validate percentiles (used by describe and quantile). - """ - - msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." - q = np.asarray(q) - if q.ndim == 0: - if not 0 <= q <= 1: - raise ValueError(msg.format(q / 100.0)) - else: - if not all(0 <= qs <= 1 for qs in q): - raise ValueError(msg.format(q / 100.0)) - return q - _shared_docs[ "pct_change" ] = """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 44a29f73c51e7..97e8a2dbac7f5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -16,7 +16,7 @@ from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_percentile from pandas.core.dtypes.common import ( _is_unorderable_exception, @@ -2317,7 +2317,7 @@ def quantile(self, q=0.5, interpolation="linear"): dtype: float64 """ - self._check_percentile(q) + validate_percentile(q) # We dispatch to DataFrame so that core.internals only has to worry # about 2D cases. diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8d5f9f7749682..f5a472596f58f 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,8 +2,11 @@ Module that contains many useful utilities for validating data or function arguments """ +from typing import Iterable, Union import warnings +import numpy as np + from pandas.core.dtypes.common import is_bool @@ -370,3 +373,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): raise ValueError("Cannot specify both 'value' and 'method'.") return value, method + + +def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: + """ + Validate percentiles (used by describe and quantile). + + This function checks if the given float oriterable of floats is a valid percentile + otherwise raises a ValueError. + + Parameters + ---------- + q: float or iterable of floats + A single percentile or an iterable of percentiles. + + Returns + ------- + ndarray + An ndarray of the percentiles if valid. + + Raises + ------ + ValueError if percentiles are not in given interval([0, 1]). + """ + msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." + q_arr = np.asarray(q) + if q_arr.ndim == 0: + if not 0 <= q_arr <= 1: + raise ValueError(msg.format(q_arr / 100.0)) + else: + if not all(0 <= qs <= 1 for qs in q_arr): + raise ValueError(msg.format(q_arr / 100.0)) + return q_arr From b209aefb0e9c62d8bc8c87e38c9fea8523af91a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 12:47:45 -0500 Subject: [PATCH 09/13] remove iNaT case --- pandas/tests/dtypes/cast/test_promote.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 0c30cfb3d5e71..0df55f7090b1b 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -669,7 +669,7 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo ) -@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT]) +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) # override parametrization due to to many xfails; see GH 23982 / 25425 @pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_any_numpy_dtype_with_na( @@ -678,17 +678,6 @@ def test_maybe_promote_any_numpy_dtype_with_na( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if not boxed: - if dtype == bytes and fill_value is not None and fill_value is not NaT: - pytest.xfail("does not upcast to object") - elif dtype == "uint64" and fill_value == iNaT: - pytest.xfail("does not upcast correctly") - # below: opinionated that iNaT should be interpreted as missing value - elif (is_float_dtype(dtype) or is_complex_dtype(dtype)) and fill_value == iNaT: - pytest.xfail("does not cast to missing value marker correctly") - elif (is_string_dtype(dtype) or dtype == bool) and fill_value == iNaT: - pytest.xfail("does not cast to missing value marker correctly") - if is_integer_dtype(dtype) and dtype == "uint64" and fill_value == iNaT: # uint64 + negative int casts to object; iNaT is considered as missing expected_dtype = np.dtype(object) From c718d98690d754d72986e7aea8a377d37b7a98bb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 12:49:36 -0500 Subject: [PATCH 10/13] remove another iNaT case --- pandas/tests/dtypes/cast/test_promote.py | 26 +++++------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 0df55f7090b1b..42c019e19b8f4 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas._libs.tslibs import NaT, iNaT +from pandas._libs.tslibs import NaT from pandas.compat import is_platform_windows from pandas.core.dtypes.cast import maybe_promote @@ -19,7 +19,6 @@ is_integer_dtype, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -432,7 +431,7 @@ def test_maybe_promote_datetimetz_with_datetimetz( ) -@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT]) +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) # override parametrization due to to many xfails; see GH 23982 / 25425 @pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box): @@ -440,14 +439,7 @@ def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box): dtype = DatetimeTZDtype(tz=tz_aware_fixture) boxed, box_dtype = box # read from parametrized fixture - # takes the opinion that DatetimeTZ should have single na-marker - # using iNaT would lead to errors elsewhere -> NaT - if not boxed and fill_value == iNaT: - # TODO: are we sure iNaT _should_ be cast to NaT? - pytest.xfail("wrong missing value marker") - expected_dtype = dtype - # DatetimeTZDtype does not use iNaT as missing value marker exp_val_for_scalar = NaT exp_val_for_array = NaT @@ -678,25 +670,17 @@ def test_maybe_promote_any_numpy_dtype_with_na( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_integer_dtype(dtype) and dtype == "uint64" and fill_value == iNaT: - # uint64 + negative int casts to object; iNaT is considered as missing - expected_dtype = np.dtype(object) - exp_val_for_scalar = np.nan - elif is_integer_dtype(dtype) and fill_value == iNaT: - # other integer + iNaT casts to int64 - expected_dtype = np.int64 - exp_val_for_scalar = iNaT - elif is_integer_dtype(dtype) and fill_value is not NaT: + if is_integer_dtype(dtype) and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 exp_val_for_scalar = np.nan - elif is_object_dtype(dtype) and (fill_value == iNaT or fill_value is NaT): + elif is_object_dtype(dtype) and fill_value is NaT: # inserting into object does not cast the value # but *does* cast None to np.nan expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value elif is_datetime_or_timedelta_dtype(dtype): - # datetime / timedelta cast all missing values to iNaT + # datetime / timedelta cast all missing values to dtyped NaT expected_dtype = dtype exp_val_for_scalar = dtype.type("NaT", "ns") elif fill_value is NaT: From 11603ad83432ab5ce00bcbeefba6d12608a8f409 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 13:48:56 -0500 Subject: [PATCH 11/13] Fix floatt32 case --- pandas/core/dtypes/cast.py | 5 ++++- pandas/tests/dtypes/cast/test_promote.py | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7bce86b572f0c..94a9f62f1fafe 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -385,7 +385,10 @@ def maybe_promote(dtype, fill_value=np.nan): if issubclass(dtype.type, np.bool_): dtype = np.object_ elif issubclass(dtype.type, np.integer): - dtype = np.float64 + dtype = np.dtype(np.float64) + if not isna(fill_value): + # TODO: use specific NAN instead of np.nan? + fill_value = dtype.type(fill_value) elif is_bool(fill_value): if not issubclass(dtype.type, np.bool_): dtype = np.object_ diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 42c019e19b8f4..fcd87a5814b72 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -174,9 +174,6 @@ def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, box): fill_dtype = np.dtype(float_dtype) boxed, box_dtype = box # read from parametrized fixture - if float_dtype == "float32" and not boxed: - pytest.xfail("falsely upcasts to float64") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] From 947556d35944f1cf92bdc17afcf42b961dd619af Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Oct 2019 09:58:22 -0700 Subject: [PATCH 12/13] revert --- pandas/core/dtypes/cast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94a9f62f1fafe..4435b2518e90b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -387,7 +387,6 @@ def maybe_promote(dtype, fill_value=np.nan): elif issubclass(dtype.type, np.integer): dtype = np.dtype(np.float64) if not isna(fill_value): - # TODO: use specific NAN instead of np.nan? fill_value = dtype.type(fill_value) elif is_bool(fill_value): if not issubclass(dtype.type, np.bool_): From b7db096b4f058bbfa8ef99a97e20cb8eebb57bdf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Oct 2019 10:02:45 -0700 Subject: [PATCH 13/13] remove one xfail --- pandas/tests/dtypes/cast/test_promote.py | 37 ++++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 9f0f58aeeecc6..45dbdf72209b6 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -658,13 +658,18 @@ def test_maybe_promote_any_with_datetime64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed and box_dtype is None: - if is_datetime64_dtype(dtype): - if not is_datetime64_dtype(type(fill_value)): - pytest.xfail("falsely upcasts to object") - else: - if is_datetime64_dtype(type(fill_value)): - pytest.xfail("mix of lack of upcasting, resp. wrong missing value") + if is_datetime64_dtype(dtype): + if boxed and ( + box_dtype == object + or (box_dtype is None and not is_datetime64_dtype(type(fill_value))) + ): + pytest.xfail("falsely upcasts to object") + else: + if boxed and ( + box_dtype == "dt_dtype" + or (box_dtype is None and is_datetime64_dtype(type(fill_value))) + ): + pytest.xfail("mix of lack of upcasting, resp. wrong missing value") # special case for box_dtype box_dtype = np.dtype(datetime64_dtype) if box_dtype == "dt_dtype" else box_dtype @@ -879,15 +884,15 @@ def test_maybe_promote_any_with_timedelta64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed: - if is_timedelta64_dtype(dtype): - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - elif box_dtype is None and not is_timedelta64_dtype(type(fill_value)): - pytest.xfail("falsely upcasts to object") - else: - if box_dtype is None and is_timedelta64_dtype(type(fill_value)): - pytest.xfail("does not upcast correctly") + if is_timedelta64_dtype(dtype): + if boxed and ( + box_dtype == object + or (box_dtype is None and not is_timedelta64_dtype(type(fill_value))) + ): + pytest.xfail("falsely upcasts to object") + else: + if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)): + pytest.xfail("does not upcast correctly") # special case for box_dtype box_dtype = np.dtype(timedelta64_dtype) if box_dtype == "td_dtype" else box_dtype