From 606b71d2c0de9478374194cec15801a060cd4653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 31 Aug 2019 23:52:31 +0200 Subject: [PATCH 01/12] Adds test case and fix suggestion --- pandas/core/indexes/numeric.py | 17 ++++++++++------- pandas/tests/indexes/test_numeric.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 46bb8eafee3b9..9f5b285b39e3c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -2,7 +2,7 @@ import numpy as np -from pandas._libs import index as libindex +from pandas._libs import index as libindex, lib from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.cast import astype_nansafe @@ -302,12 +302,15 @@ def _convert_scalar_indexer(self, key, kind=None): @Appender(_index_shared_docs["_convert_arr_indexer"]) def _convert_arr_indexer(self, keyarr): - # Cast the indexer to uint64 if possible so - # that the values returned from indexing are - # also uint64. - keyarr = com.asarray_tuplesafe(keyarr) - if is_integer_dtype(keyarr): - return com.asarray_tuplesafe(keyarr, dtype=np.uint64) + # Cast the indexer to uint64 if possible so that the values returned + # from indexing are also uint64. + if is_integer_dtype(keyarr) or ( + lib.infer_dtype(keyarr, skipna=False) == "integer" + ): + keyarr = com.asarray_tuplesafe(keyarr, dtype=np.uint64) + else: + keyarr = com.asarray_tuplesafe(keyarr) + return keyarr @Appender(_index_shared_docs["_convert_index_indexer"]) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e424b3601a4b2..4607205b5d862 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1187,3 +1187,22 @@ def test_range_float_union_dtype(): result = other.union(index) tm.assert_index_equal(result, expected) + + +def test_uint64_keys_in_list(): + # https://github.com/pandas-dev/pandas/issues/28023 + bug = pd.Series( + [0, 1, 2, 3, 4], + index=[ + 7606741985629028552, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411471, + ], + ) + + tm.assert_equal( + bug.loc[[7606741985629028552, 17876870360202815256]], bug.iloc[[0, 1]] + ) + From 7b0eef45c8cbed2a70f252137d0683ba0d1821ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Tue, 5 Nov 2019 21:56:35 +0100 Subject: [PATCH 02/12] Uses skipna=False default for libs.lib.infer_dtype() --- pandas/core/indexes/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 9f5b285b39e3c..7f94899bfd9a6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -305,7 +305,7 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so that the values returned # from indexing are also uint64. if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr, skipna=False) == "integer" + lib.infer_dtype(keyarr) == "integer" ): keyarr = com.asarray_tuplesafe(keyarr, dtype=np.uint64) else: From 2b537c6e0a5e6f8e889a79a6ef4bfff8cdcbb507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Tue, 5 Nov 2019 22:16:18 +0100 Subject: [PATCH 03/12] Reverts last commit due to deprecation warning - Uses skipna=False default for libs.lib.infer_dtype() --- pandas/core/indexes/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7f94899bfd9a6..9f5b285b39e3c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -305,7 +305,7 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so that the values returned # from indexing are also uint64. if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr) == "integer" + lib.infer_dtype(keyarr, skipna=False) == "integer" ): keyarr = com.asarray_tuplesafe(keyarr, dtype=np.uint64) else: From 82cdc80820709d8f7c49e67b7bb354026be95313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Tue, 5 Nov 2019 22:16:57 +0100 Subject: [PATCH 04/12] Add test case for #28279 --- pandas/tests/indexes/test_numeric.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 4607205b5d862..141df5284dea8 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1201,8 +1201,26 @@ def test_uint64_keys_in_list(): 8991270399732411471, ], ) - + tm.assert_equal( bug.loc[[7606741985629028552, 17876870360202815256]], bug.iloc[[0, 1]] ) + +def test_uint_index_not_converted_to_float64(): + # https://github.com/pandas-dev/pandas/issues/28279 + bug = pd.Series( + [0, 1, 2, 3, 4], + index=[ + 7606741985629028552, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411472, + ], + ) + + assert isinstance( + bug.loc[[7606741985629028552, 17876870360202815256]].index, UInt64Index + ) + From d1818dd56de02ea7ee8489983a95e9a194b68073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Tue, 5 Nov 2019 22:27:24 +0100 Subject: [PATCH 05/12] Passes black and flake8 --- pandas/tests/indexes/test_numeric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 141df5284dea8..5e2746408966c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1223,4 +1223,3 @@ def test_uint_index_not_converted_to_float64(): assert isinstance( bug.loc[[7606741985629028552, 17876870360202815256]].index, UInt64Index ) - From 3e370b203a8f4fbee0b2a3ee7a673a036a777b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 6 Nov 2019 19:53:02 +0100 Subject: [PATCH 06/12] Removes unnecessary test --- pandas/tests/indexes/test_numeric.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 5e2746408966c..b788c4ca8fbd4 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1189,24 +1189,6 @@ def test_range_float_union_dtype(): tm.assert_index_equal(result, expected) -def test_uint64_keys_in_list(): - # https://github.com/pandas-dev/pandas/issues/28023 - bug = pd.Series( - [0, 1, 2, 3, 4], - index=[ - 7606741985629028552, - 17876870360202815256, - 13106359306506049338, - 8991270399732411471, - 8991270399732411471, - ], - ) - - tm.assert_equal( - bug.loc[[7606741985629028552, 17876870360202815256]], bug.iloc[[0, 1]] - ) - - def test_uint_index_not_converted_to_float64(): # https://github.com/pandas-dev/pandas/issues/28279 bug = pd.Series( From 1722116a8db87d5eb92f65777222a77b18258361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 6 Nov 2019 19:55:16 +0100 Subject: [PATCH 07/12] Rename test --- pandas/tests/indexes/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index b788c4ca8fbd4..143e39842db16 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1189,7 +1189,7 @@ def test_range_float_union_dtype(): tm.assert_index_equal(result, expected) -def test_uint_index_not_converted_to_float64(): +def test_uint_index_does_not_convert_to_float64(): # https://github.com/pandas-dev/pandas/issues/28279 bug = pd.Series( [0, 1, 2, 3, 4], From 500e73c9bbec449214490123504866b3ae14072c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 10 Nov 2019 12:48:14 +0100 Subject: [PATCH 08/12] Clarifies logic & tests explicitly --- pandas/core/indexes/numeric.py | 7 +++---- pandas/tests/indexes/test_numeric.py | 9 +++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 9f5b285b39e3c..514bdc3095aad 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -304,14 +304,13 @@ def _convert_scalar_indexer(self, key, kind=None): def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so that the values returned # from indexing are also uint64. + dtype = None if is_integer_dtype(keyarr) or ( lib.infer_dtype(keyarr, skipna=False) == "integer" ): - keyarr = com.asarray_tuplesafe(keyarr, dtype=np.uint64) - else: - keyarr = com.asarray_tuplesafe(keyarr) + dtype = np.uint64 - return keyarr + return com.asarray_tuplesafe(keyarr, dtype=dtype) @Appender(_index_shared_docs["_convert_index_indexer"]) def _convert_index_indexer(self, keyarr): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 143e39842db16..5ae1c68a9889b 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1191,7 +1191,7 @@ def test_range_float_union_dtype(): def test_uint_index_does_not_convert_to_float64(): # https://github.com/pandas-dev/pandas/issues/28279 - bug = pd.Series( + series = pd.Series( [0, 1, 2, 3, 4], index=[ 7606741985629028552, @@ -1202,6 +1202,7 @@ def test_uint_index_does_not_convert_to_float64(): ], ) - assert isinstance( - bug.loc[[7606741985629028552, 17876870360202815256]].index, UInt64Index - ) + result = series.loc[[7606741985629028552, 17876870360202815256]].index + expected = UInt64Index([7606741985629028552, 17876870360202815256], dtype='uint64') + + assert tm.assert_index_equal(result, expected) \ No newline at end of file From 813ed20e520f1b59bf35b635894efecd724c36bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 10 Nov 2019 12:52:46 +0100 Subject: [PATCH 09/12] Formatter... --- pandas/tests/indexes/test_numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 5ae1c68a9889b..f0e3246a222fb 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1203,6 +1203,6 @@ def test_uint_index_does_not_convert_to_float64(): ) result = series.loc[[7606741985629028552, 17876870360202815256]].index - expected = UInt64Index([7606741985629028552, 17876870360202815256], dtype='uint64') + expected = UInt64Index([7606741985629028552, 17876870360202815256], dtype="uint64") - assert tm.assert_index_equal(result, expected) \ No newline at end of file + assert tm.assert_index_equal(result, expected) From 78db1e83b06eb1919fff5e8ae023786416830b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 18 Nov 2019 22:27:44 +0100 Subject: [PATCH 10/12] Replay changes from #29529 & replay whatsnew in master --- doc/source/whatsnew/v1.0.0.rst | 3 +++ pandas/core/indexes/numeric.py | 13 +++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b40a64420a0be..5021628975016 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -333,6 +333,9 @@ Numeric - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) +- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) +- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) +- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) - Conversion diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 514bdc3095aad..f5a61a232c74a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -22,6 +22,7 @@ ABCFloat64Index, ABCInt64Index, ABCRangeIndex, + ABCSeries, ABCUInt64Index, ) from pandas.core.dtypes.missing import isna @@ -56,8 +57,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): if fastpath: return cls._simple_new(data, name=name) - # is_scalar, generators handled in coerce_to_ndarray - data = cls._coerce_to_ndarray(data) + # Coerce to ndarray if not already ndarray or Index + if not isinstance(data, (np.ndarray, Index)): + if is_scalar(data): + raise cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (ABCSeries, list, tuple)): + data = list(data) + + data = np.asarray(data, dtype=dtype) if issubclass(data.dtype.type, str): cls._string_data_error(data) From a5d20d97361bfbb86f5c18b4464ed3d52e1152cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 18 Nov 2019 22:31:59 +0100 Subject: [PATCH 11/12] Fixed test and added whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/tests/indexes/test_numeric.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5021628975016..466555e72f7b9 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -336,6 +336,7 @@ Numeric - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) - Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) +- Bug in :meth:`NumericIndex._convert_arr_indexer` that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) - Conversion diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f0e3246a222fb..7810fe4c1c93f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1205,4 +1205,4 @@ def test_uint_index_does_not_convert_to_float64(): result = series.loc[[7606741985629028552, 17876870360202815256]].index expected = UInt64Index([7606741985629028552, 17876870360202815256], dtype="uint64") - assert tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) From aac01ea03dabfff28b9a67e07c99c88cfa146fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 23 Nov 2019 16:22:29 +0100 Subject: [PATCH 12/12] Addressed review --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- pandas/tests/indexes/test_numeric.py | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1da2814d20e23..950b8db373eef 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -345,8 +345,8 @@ Numeric - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) - Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) -- Bug in :meth:`NumericIndex._convert_arr_indexer` that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) -- +- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) +- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e49feb82b0c47..37976d89ecba4 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1213,18 +1213,25 @@ def test_range_float_union_dtype(): def test_uint_index_does_not_convert_to_float64(): # https://github.com/pandas-dev/pandas/issues/28279 + # https://github.com/pandas-dev/pandas/issues/28023 series = pd.Series( - [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4, 5], index=[ 7606741985629028552, 17876870360202815256, + 17876870360202815256, 13106359306506049338, 8991270399732411471, 8991270399732411472, ], ) - result = series.loc[[7606741985629028552, 17876870360202815256]].index - expected = UInt64Index([7606741985629028552, 17876870360202815256], dtype="uint64") + result = series.loc[[7606741985629028552, 17876870360202815256]] - tm.assert_index_equal(result, expected) + expected = UInt64Index( + [7606741985629028552, 17876870360202815256, 17876870360202815256], + dtype="uint64", + ) + tm.assert_index_equal(result.index, expected) + + tm.assert_equal(result, series[:3])