From 9fe57d479a398d301a479bfe09ed0ea85482fff4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 13 Nov 2020 14:52:51 -0800 Subject: [PATCH 1/3] CLN: avoid incorrect usages of values_for_argsort --- pandas/core/indexes/extension.py | 8 ++++---- pandas/core/internals/concat.py | 2 +- pandas/core/reshape/merge.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 3103c27b35d74..4d09a97b18eed 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -231,10 +231,7 @@ def __getitem__(self, key): # --------------------------------------------------------------------- def _get_engine_target(self) -> np.ndarray: - # NB: _values_for_argsort happens to match the desired engine targets - # for all of our existing EA-backed indexes, but in general - # cannot be relied upon to exist. - return self._data._values_for_argsort() + return np.asarray(self._data) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) @@ -306,6 +303,9 @@ class NDArrayBackedExtensionIndex(ExtensionIndex): _data: NDArrayBackedExtensionArray + def _get_engine_target(self) -> np.ndarray: + return self._data._ndarray + def delete(self, loc): """ Make new Index with passed location(-s) deleted diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 8efba87b14ce5..32e0a2ed0e8bb 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -227,7 +227,7 @@ def is_na(self) -> bool: return isna_all(values_flat) - def get_reindexed_values(self, empty_dtype, upcasted_na): + def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na): if upcasted_na is None: # No upcasting is necessary fill_value = self.block.fill_value diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index dd45a00155721..7aa5a289cc45a 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1947,8 +1947,8 @@ def _factorize_keys( if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype): # Extract the ndarray (UTC-localized) values # Note: we dont need the dtypes to match, as these can still be compared - lk, _ = lk._values_for_factorize() - rk, _ = rk._values_for_factorize() + lk = lk._ndarray + rk = rk._ndarray elif ( is_categorical_dtype(lk.dtype) From 89abcda68b2e23668598564e121a435fe8e2387d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 13 Nov 2020 16:12:55 -0800 Subject: [PATCH 2/3] mypy fixup --- pandas/core/internals/concat.py | 3 +-- pandas/core/reshape/merge.py | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 32e0a2ed0e8bb..205af5354d333 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -248,9 +248,8 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na): empty_dtype ): if self.block is None: - array = empty_dtype.construct_array_type() # TODO(EA2D): special case unneeded with 2D EAs - return array( + return DatetimeArray( np.full(self.shape[1], fill_value.value), dtype=empty_dtype ) elif getattr(self.block, "is_categorical", False): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7aa5a289cc45a..984acf1bc3a37 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -6,7 +6,7 @@ import datetime from functools import partial import string -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple, cast import warnings import numpy as np @@ -50,6 +50,7 @@ if TYPE_CHECKING: from pandas import DataFrame + from pandas.core.arrays import DatetimeArray @Substitution("\nleft : DataFrame") @@ -1947,8 +1948,8 @@ def _factorize_keys( if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype): # Extract the ndarray (UTC-localized) values # Note: we dont need the dtypes to match, as these can still be compared - lk = lk._ndarray - rk = rk._ndarray + lk = cast("DatetimeArray", lk)._ndarray + rk = cast("DatetimeArray", lk)._ndarray elif ( is_categorical_dtype(lk.dtype) From 26641bd182a20cd62d40a6ead2288b9b1b61213b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 13 Nov 2020 16:58:39 -0800 Subject: [PATCH 3/3] typo fixup --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 984acf1bc3a37..918a894a27916 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1949,7 +1949,7 @@ def _factorize_keys( # Extract the ndarray (UTC-localized) values # Note: we dont need the dtypes to match, as these can still be compared lk = cast("DatetimeArray", lk)._ndarray - rk = cast("DatetimeArray", lk)._ndarray + rk = cast("DatetimeArray", rk)._ndarray elif ( is_categorical_dtype(lk.dtype)