From c1c235f9eddf4fdd335b9ff2f9b7b6d9c9f9d33a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 4 Jan 2020 12:51:04 -0800 Subject: [PATCH 1/2] Implement PeriodIndex.difference without object-dtype cast --- pandas/_testing.py | 5 ++++- pandas/core/indexes/period.py | 38 ++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 2ebebc5d5e10a..7f6cd69625abd 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -619,7 +619,10 @@ def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] level_codes = index.codes[level] - filled = take_1d(unique.values, level_codes, fill_value=unique._na_value) + if is_extension_array_dtype(unique): + filled = unique.take(level_codes, fill_value=unique._na_value) + else: + filled = take_1d(unique.values, level_codes, fill_value=unique._na_value) values = unique._shallow_copy(filled, name=index.names[level]) return values diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 72ef335665ee5..8e8e07a4e06e6 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -17,6 +17,7 @@ is_float_dtype, is_integer, is_integer_dtype, + is_object_dtype, pandas_dtype, ) @@ -594,13 +595,13 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing def _get_unique_index(self, dropna=False): - """ - wrap Index._get_unique_index to handle NaT - """ - res = super()._get_unique_index(dropna=dropna) - if dropna: - res = res.dropna() - return res + if self.is_unique and not dropna: + return self + + result = self._data.unique() + if dropna and self.hasnans: + result = result[~result.isna()] + return self._shallow_copy(result) def get_loc(self, key, method=None, tolerance=None): """ @@ -818,6 +819,29 @@ def intersection(self, other, sort=False): result = self._shallow_copy(np.asarray(i8result, dtype=np.int64), name=res_name) return result + def difference(self, other, sort=None): + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + res_name = get_op_result_name(self, other) + other = ensure_index(other) + + if self.equals(other): + # pass an empty PeriodArray with the appropriate dtype + return self._shallow_copy(self._data[:0]) + + if is_object_dtype(other): + return self.astype(object).difference(other).astype(self.dtype) + + elif not is_dtype_equal(self.dtype, other.dtype): + return self + + i8self = Int64Index._simple_new(self.asi8) + i8other = Int64Index._simple_new(other.asi8) + i8result = i8self.difference(i8other, sort=sort) + + result = self._shallow_copy(np.asarray(i8result, dtype=np.int64), name=res_name) + return result + # ------------------------------------------------------------------------ def _apply_meta(self, rawarr): From b7dd4123df08a51b6940be7398cfdde428530ac8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 4 Jan 2020 15:43:27 -0800 Subject: [PATCH 2/2] use take_1d with unique._values --- pandas/_testing.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 7f6cd69625abd..e9151e6df8260 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -619,10 +619,7 @@ def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] level_codes = index.codes[level] - if is_extension_array_dtype(unique): - filled = unique.take(level_codes, fill_value=unique._na_value) - else: - filled = take_1d(unique.values, level_codes, fill_value=unique._na_value) + filled = take_1d(unique._values, level_codes, fill_value=unique._na_value) values = unique._shallow_copy(filled, name=index.names[level]) return values