From ee0a8db044ec10ddeff4fdfefd6a99a52d5d1ff5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 1 Oct 2021 14:55:52 -0700 Subject: [PATCH 1/3] CLN: assorted --- pandas/_libs/index.pyx | 9 +++++---- pandas/_libs/lib.pyx | 6 ++++-- pandas/core/internals/blocks.py | 7 +++++-- pandas/core/series.py | 12 +++++------- pandas/tests/apply/test_invalid_arg.py | 6 +++++- pandas/tests/arithmetic/test_numeric.py | 8 ++------ pandas/tests/extension/base/dim2.py | 9 ++------- pandas/tests/extension/base/methods.py | 11 +++++------ pandas/tests/extension/json/test_json.py | 6 ------ pandas/tests/indexes/test_engines.py | 2 +- pandas/tests/strings/test_find_replace.py | 5 +---- 11 files changed, 35 insertions(+), 46 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f4d59962c111e..f93b4e9a019b6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -174,7 +174,7 @@ cdef class IndexEngine: cdef inline _get_loc_duplicates(self, object val): # -> Py_ssize_t | slice | ndarray[bool] cdef: - Py_ssize_t diff + Py_ssize_t diff, left, right if self.is_monotonic_increasing: values = self.values @@ -318,8 +318,8 @@ cdef class IndexEngine: set stargets, remaining_stargets dict d = {} object val - int count = 0, count_missing = 0 - Py_ssize_t i, j, n, n_t, n_alloc + Py_ssize_t count = 0, count_missing = 0 + Py_ssize_t i, j, n, n_t, n_alloc, start, end bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True values = self.values @@ -481,7 +481,8 @@ cdef class DatetimeEngine(Int64Engine): # with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine) cdef: - int64_t loc + Py_ssize_t loc + if is_definitely_invalid_key(val): raise TypeError(f"'{val}' is an invalid key") diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4cc8d1ac2f60e..7e49c7f1952c4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1423,7 +1423,8 @@ def infer_dtype(value: object, skipna: bool = True) -> str: # this will handle ndarray-like # e.g. categoricals dtype = value.dtype - if not isinstance(dtype, np.dtype): + if not cnp.PyArray_DescrCheck(dtype): + # i.e. not isinstance(dtype, np.dtype) inferred = _try_infer_map(value.dtype) if inferred is not None: return inferred @@ -2723,7 +2724,8 @@ cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas): """ If we have all-NaT values, cast these to the given dtype. """ - if isinstance(dtype, np.dtype): + if cnp.PyArray_DescrCheck(dtype): + # i.e. isinstance(dtype, np.dtype): if dtype == "M8[ns]": result = datetimes elif dtype == "m8[ns]": diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2279dbd283905..44806df09e199 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -959,6 +959,8 @@ def setitem(self, indexer, value): # GH#32395 if we're going to replace the values entirely, just # substitute in the new array if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)): + # _can_hold_element will only allow us to get here if value + # has no NA entries. values[indexer] = value.to_numpy(value.dtype.numpy_dtype) else: values[indexer] = np.asarray(value) @@ -982,7 +984,7 @@ def setitem(self, indexer, value): if transpose: values = values.T - block = self.make_block(values) + block = type(self)(values, placement=self._mgr_locs, ndim=self.ndim) return block def putmask(self, mask, new) -> list[Block]: @@ -1469,7 +1471,8 @@ def putmask(self, mask, new) -> list[Block]: mask = mask.reshape(new_values.shape) new_values[mask] = new - return [self.make_block(values=new_values)] + nb = type(self)(new_values, placement=self._mgr_locs, ndim=self.ndim) + return [nb] @property def is_view(self) -> bool: diff --git a/pandas/core/series.py b/pandas/core/series.py index a891a7bdd8fbd..7b60f341e92ed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3670,16 +3670,14 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series: mask = isna(values) if mask.any(): - result = Series(-1, index=self.index, name=self.name, dtype="int64") + result = Series(-1, index=self.index, name=self.name, dtype=np.intp) notmask = ~mask result[notmask] = np.argsort(values[notmask], kind=kind) - return self._constructor(result, index=self.index).__finalize__( - self, method="argsort" - ) else: - return self._constructor( - np.argsort(values, kind=kind), index=self.index, dtype="int64" - ).__finalize__(self, method="argsort") + result = np.argsort(values, kind=kind) + + res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) + return res.__finalize__(self, method="argsort") def nlargest(self, n=5, keep="first") -> Series: """ diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 83a1baa9d13d6..b0faeba23a479 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -335,8 +335,12 @@ def test_transform_wont_agg_series(string_series, func): # GH 35964 # we are trying to transform with an aggregator msg = "Function did not transform" + + warn = RuntimeWarning if func[0] == "sqrt" else None + warn_msg = "invalid value encountered in sqrt" with pytest.raises(ValueError, match=msg): - string_series.transform(func) + with tm.assert_produces_warning(warn, match=warn_msg): + string_series.transform(func) @pytest.mark.parametrize( diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1ff958640ecad..2cdc35bdf51cb 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1399,20 +1399,16 @@ def test_integer_array_add_list_like( right = box_1d_array(data) + container if Series == box_pandas_1d_array: - assert_function = tm.assert_series_equal expected = Series(expected_data, dtype="Int64") elif Series == box_1d_array: - assert_function = tm.assert_series_equal expected = Series(expected_data, dtype="object") elif Index in (box_pandas_1d_array, box_1d_array): - assert_function = tm.assert_index_equal expected = Int64Index(expected_data) else: - assert_function = tm.assert_numpy_array_equal expected = np.array(expected_data, dtype="object") - assert_function(left, expected) - assert_function(right, expected) + tm.assert_equal(left, expected) + tm.assert_equal(right, expected) def test_sub_multiindex_swapped_levels(): diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index d826a3c30bcc7..b56ec23c63569 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -183,16 +183,11 @@ def test_reductions_2d_axis0(self, data, method, request): if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]: # FIXME: kludge if data.dtype.kind == "i": - dtype = pd.Int64Dtype + dtype = pd.Int64Dtype() else: - dtype = pd.UInt64Dtype + dtype = pd.UInt64Dtype() expected = data.astype(dtype) - if type(expected) != type(data): - mark = pytest.mark.xfail( - reason="IntegerArray.astype is broken GH#38983" - ) - request.node.add_marker(mark) assert type(expected) == type(data), type(expected) assert dtype == expected.dtype diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index ca9c2acb9fd12..d390d4b5d8143 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -71,20 +71,19 @@ def test_apply_simple_series(self, data): def test_argsort(self, data_for_sorting): result = pd.Series(data_for_sorting).argsort() - expected = pd.Series(np.array([2, 0, 1], dtype=np.int64)) + # argsort result gets passed to take, so should be np.intp + expected = pd.Series(np.array([2, 0, 1], dtype=np.intp)) self.assert_series_equal(result, expected) def test_argsort_missing_array(self, data_missing_for_sorting): result = data_missing_for_sorting.argsort() - expected = np.array([2, 0, 1], dtype=np.dtype("int")) - # we don't care whether it's int32 or int64 - result = result.astype("int64", casting="safe") - expected = expected.astype("int64", casting="safe") + # argsort result gets passed to take, so should be np.intp + expected = np.array([2, 0, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) def test_argsort_missing(self, data_missing_for_sorting): result = pd.Series(data_missing_for_sorting).argsort() - expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) + expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) self.assert_series_equal(result, expected) def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b5bb68e8a9a12..f090396a70724 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -227,12 +227,6 @@ def test_sort_values_frame(self): # TODO (EA.factorize): see if _values_for_factorize allows this. pass - def test_argsort(self, data_for_sorting): - super().test_argsort(data_for_sorting) - - def test_argsort_missing(self, data_missing_for_sorting): - super().test_argsort_missing(data_missing_for_sorting) - @pytest.mark.parametrize("ascending", [True, False]) def test_sort_values(self, data_for_sorting, ascending, sort_by_key): super().test_sort_values(data_for_sorting, ascending, sort_by_key) diff --git a/pandas/tests/indexes/test_engines.py b/pandas/tests/indexes/test_engines.py index 785db921dbab4..663ba7332c864 100644 --- a/pandas/tests/indexes/test_engines.py +++ b/pandas/tests/indexes/test_engines.py @@ -69,7 +69,7 @@ class TestTimedeltaEngine: pd.Timedelta(days=42).to_timedelta64(), ], ) - def test_not_contains_requires_timestamp(self, scalar): + def test_not_contains_requires_timedelta(self, scalar): tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234) tdi2 = tdi1.insert(1, pd.NaT) # non-monotonic tdi3 = tdi1.insert(3, tdi1[0]) # non-unique diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 391c71e57399a..f390cbf492202 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -878,10 +878,7 @@ def test_translate(index_or_series, any_string_dtype): expected = index_or_series( ["cdedefg", "cdee", "edddfg", "edefggg"], dtype=any_string_dtype ) - if index_or_series is Series: - tm.assert_series_equal(result, expected) - else: - tm.assert_index_equal(result, expected) + tm.assert_equal(result, expected) def test_translate_mixed_object(): From beda57833af016a9bd4002a060b4818930f096aa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 1 Oct 2021 16:04:01 -0700 Subject: [PATCH 2/3] mypy and 32bit fixup --- pandas/core/series.py | 3 ++- pandas/tests/series/methods/test_argsort.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7b60f341e92ed..3573dadd71495 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3670,7 +3670,8 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series: mask = isna(values) if mask.any(): - result = Series(-1, index=self.index, name=self.name, dtype=np.intp) + result = np.empty(len(self), dtype=np.intp) + result.fill(-1) notmask = ~mask result[notmask] = np.argsort(values[notmask], kind=kind) else: diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 7a545378ef402..1fbc9ed787e11 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -38,11 +38,11 @@ def test_argsort(self, datetime_series): assert isna(shifted[4]) result = s.argsort() - expected = Series(range(5), dtype="int64") + expected = Series(range(5), dtype=np.intp) tm.assert_series_equal(result, expected) result = shifted.argsort() - expected = Series(list(range(4)) + [-1], dtype="int64") + expected = Series(list(range(4)) + [-1], dtype=np.intp) tm.assert_series_equal(result, expected) def test_argsort_stable(self): From 92413dedc9d153fd108cea0e496037864b7e17bb Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 1 Oct 2021 18:47:57 -0700 Subject: [PATCH 3/3] use np.full --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3573dadd71495..6f48da82169b2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3670,8 +3670,7 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series: mask = isna(values) if mask.any(): - result = np.empty(len(self), dtype=np.intp) - result.fill(-1) + result = np.full(len(self), -1, dtype=np.intp) notmask = ~mask result[notmask] = np.argsort(values[notmask], kind=kind) else: