From 5685cb169e5f0afd1b16c057c341163e8fd676f2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 11:38:06 +0000 Subject: [PATCH 01/15] refactor core indexes --- pandas/core/indexes/api.py | 5 +- pandas/core/indexes/base.py | 72 +++++++++++------------------ pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 35 +++++++------- pandas/core/indexes/datetimes.py | 16 +++---- 5 files changed, 54 insertions(+), 76 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 18981a2190552..d4f22e482af84 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -282,7 +282,4 @@ def all_indexes_same(indexes): """ itr = iter(indexes) first = next(itr) - for index in itr: - if not first.equals(index): - return False - return True + return all(first.equals(index) for index in itr) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 18e4540b9a25e..3bdb9d3c5b759 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -115,7 +115,7 @@ unique="Index", duplicated="np.ndarray", ) -_index_shared_docs = dict() +_index_shared_docs = {} str_t = str @@ -137,10 +137,9 @@ def _new_Index(cls, d): return _new_PeriodIndex(cls, **d) - if issubclass(cls, ABCMultiIndex): - if "labels" in d and "codes" not in d: - # GH#23752 "labels" kwarg has been replaced with "codes" - d["codes"] = d.pop("labels") + if issubclass(cls, ABCMultiIndex) and "labels" in d and "codes" not in d: + # GH#23752 "labels" kwarg has been replaced with "codes" + d["codes"] = d.pop("labels") return cls.__new__(cls, **d) @@ -902,9 +901,7 @@ def __repr__(self) -> str_t: if data is None: data = "" - res = f"{klass_name}({data}{prepr})" - - return res + return f"{klass_name}({data}{prepr})" def _format_space(self) -> str_t: @@ -983,7 +980,6 @@ def _format_with_header( if is_object_dtype(values.dtype): values = lib.maybe_convert_objects(values, safe=1) - if is_object_dtype(values.dtype): result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] # could have nans @@ -1584,7 +1580,7 @@ def _drop_level_numbers(self, levnums: List[int]): Drop MultiIndex levels by level _number_, not name. """ - if len(levnums) == 0: + if not levnums: return self if len(levnums) >= self.nlevels: raise ValueError( @@ -3121,7 +3117,7 @@ def get_indexer( "Reindexing only valid with uniquely valued Index objects" ) - if method == "pad" or method == "backfill": + if method in ["pad", "backfill"]: indexer = self._get_fill_indexer(target, method, limit, tolerance) elif method == "nearest": indexer = self._get_nearest_indexer(target, limit, tolerance) @@ -3234,8 +3230,7 @@ def _filter_indexer_tolerance( tolerance, ) -> np.ndarray: distance = abs(self._values[indexer] - target) - indexer = np.where(distance <= tolerance, indexer, -1) - return indexer + return np.where(distance <= tolerance, indexer, -1) # -------------------------------------------------------------------- # Indexer Conversion Methods @@ -3344,9 +3339,7 @@ def _convert_listlike_indexer(self, keyarr): keyarr : numpy.ndarray Return tuple-safe keys. """ - if isinstance(keyarr, Index): - pass - else: + if not isinstance(keyarr, Index): keyarr = self._convert_arr_indexer(keyarr) indexer = self._convert_list_indexer(keyarr) @@ -3365,8 +3358,7 @@ def _convert_arr_indexer(self, keyarr): ------- converted_keyarr : array-like """ - keyarr = com.asarray_tuplesafe(keyarr) - return keyarr + return com.asarray_tuplesafe(keyarr) def _convert_list_indexer(self, keyarr): """ @@ -3738,9 +3730,8 @@ def _join_multi(self, other, how, return_indexers=True): other, level, how=how, return_indexers=return_indexers ) - if flip_order: - if isinstance(result, tuple): - return result[0], result[2], result[1] + if flip_order and isinstance(result, tuple): + return result[0], result[2], result[1] return result @final @@ -4292,7 +4283,7 @@ def append(self, other): to_concat = [self] if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) + to_concat += list(other) else: to_concat.append(other) @@ -4781,9 +4772,7 @@ def _should_fallback_to_positional(self) -> bool: """ Should an integer key be treated as positional? """ - if self.holds_integer() or self.is_boolean(): - return False - return True + return not self.holds_integer() and not self.is_boolean() def _get_values_for_loc(self, series: "Series", loc, key): """ @@ -5176,11 +5165,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): """ assert kind in ["getitem", "iloc"] - if key is None: - pass - elif is_integer(key): - pass - else: + if key is not None and not is_integer(key): self._invalid_indexer(form, key) def _maybe_cast_slice_bound(self, label, side: str_t, kind): @@ -5284,11 +5269,11 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: slc = lib.maybe_indices_to_slice( slc.astype(np.intp, copy=False), len(self) ) - if isinstance(slc, np.ndarray): - raise KeyError( - f"Cannot get {side} slice bound for non-unique " - f"label: {repr(original_label)}" - ) + if isinstance(slc, np.ndarray): + raise KeyError( + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" + ) if isinstance(slc, slice): if side == "left": @@ -5479,9 +5464,10 @@ def _cmp_method(self, other, op): """ Wrapper used to dispatch comparison operations. """ - if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): - if len(self) != len(other): - raise ValueError("Lengths must match to compare") + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( + self + ) != len(other): + raise ValueError("Lengths must match to compare") if not isinstance(other, ABCMultiIndex): other = extract_array(other, extract_numpy=True) @@ -5803,7 +5789,7 @@ def trim_front(strings: List[str]) -> List[str]: Trims zeros and decimal points. """ trimmed = strings - while len(strings) > 0 and all(x[0] == " " for x in trimmed): + while trimmed and all(x[0] == " " for x in trimmed): trimmed = [x[1:] for x in trimmed] return trimmed @@ -5873,15 +5859,11 @@ def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np. except ValueError: data = np.array(data, dtype=np.float64, copy=copy) - elif inferred == "string": - pass - else: + elif inferred != "string": data = data.astype(dtype) elif is_float_dtype(dtype): inferred = lib.infer_dtype(data, skipna=False) - if inferred == "string": - pass - else: + if inferred != "string": data = data.astype(dtype) else: data = np.array(data, dtype=dtype, copy=copy) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2f2836519d847..1315388224713 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -526,7 +526,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if self.is_unique and self.equals(target): return np.arange(len(self), dtype="intp") - if method == "pad" or method == "backfill": + if method in ["pad", "backfill"]: raise NotImplementedError( "method='pad' and method='backfill' not " "implemented yet for CategoricalIndex" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9215fc8994d87..c0e2ce3c754cf 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -239,9 +239,8 @@ def min(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 try: # quick check - if len(i8) and self.is_monotonic: - if i8[0] != iNaT: - return self._data._box_func(i8[0]) + if len(i8) and self.is_monotonic and i8[0] != iNaT: + return self._data._box_func(i8[0]) if self.hasnans: if skipna: @@ -296,9 +295,8 @@ def max(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 try: # quick check - if len(i8) and self.is_monotonic: - if i8[-1] != iNaT: - return self._data._box_func(i8[-1]) + if len(i8) and self.is_monotonic and i8[-1] != iNaT: + return self._data._box_func(i8[-1]) if self.hasnans: if skipna: @@ -579,9 +577,12 @@ def _get_delete_freq(self, loc: int): loc = lib.maybe_indices_to_slice( np.asarray(loc, dtype=np.intp), len(self) ) - if isinstance(loc, slice) and loc.step in (1, None): - if loc.start in (0, None) or loc.stop in (len(self), None): - freq = self.freq + if ( + isinstance(loc, slice) + and loc.step in (1, None) + and (loc.start in (0, None) or loc.stop in (len(self), None)) + ): + freq = self.freq return freq def _get_insert_freq(self, loc, item): @@ -599,7 +600,7 @@ def _get_insert_freq(self, loc, item): if self.size: if item is NaT: pass - elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + elif loc in [0, -len(self)] and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq @@ -684,8 +685,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): @Appender(Index.difference.__doc__) def difference(self, other, sort=None): - new_idx = super().difference(other, sort=sort)._with_freq(None) - return new_idx + return super().difference(other, sort=sort)._with_freq(None) def intersection(self, other, sort=False): """ @@ -727,10 +727,9 @@ def intersection(self, other, sort=False): if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) - if isinstance(result, type(self)): - if result.freq is None: - # TODO: no tests rely on this; needed? - result = result._with_freq("infer") + if isinstance(result, type(self)) and result.freq is None: + # TODO: no tests rely on this; needed? + result = result._with_freq("infer") return result elif not self._can_fast_intersect(other): @@ -876,13 +875,13 @@ def _union(self, other, sort): elif result.freq is None: # TODO: no tests rely on this; needed? result = result._with_freq("infer") - return result else: i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) result = type(self)(i8result, dtype=self.dtype, freq="infer") - return result + + return result # -------------------------------------------------------------------- # Join Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index aa16dc9752565..51cf0f1d7455f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -425,12 +425,13 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: this = self if isinstance(other, DatetimeIndex): - if self.tz is not None: - if other.tz is None: - raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") - elif other.tz is not None: + if ( + self.tz is not None + and other.tz is None + or self.tz is None + and other.tz is not None + ): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") - if not timezones.tz_compare(self.tz, other.tz): this = self.tz_convert("UTC") other = other.tz_convert("UTC") @@ -758,8 +759,7 @@ def _get_string_slice(self, key: str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) parsed, reso = parsing.parse_time_string(key, freq) reso = Resolution.from_attrname(reso) - loc = self._partial_date_slice(reso, parsed) - return loc + return self._partial_date_slice(reso, parsed) def slice_indexer(self, start=None, end=None, step=None, kind=None): """ @@ -822,7 +822,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- def is_type_compatible(self, typ) -> bool: - return typ == self.inferred_type or typ == "datetime" + return typ in [self.inferred_type, "datetime"] @property def inferred_type(self) -> str: From 7695e66c5d2fafcb8d671b9c9ff0ecfe3200c733 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 14:58:53 +0000 Subject: [PATCH 02/15] parens --- pandas/core/indexes/datetimes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 51cf0f1d7455f..0b92a9fbdc413 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -426,10 +426,10 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: if isinstance(other, DatetimeIndex): if ( - self.tz is not None - and other.tz is None - or self.tz is None - and other.tz is not None + (self.tz is not None) + and (other.tz is None) + or (self.tz is None) + and (other.tz is not None) ): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if not timezones.tz_compare(self.tz, other.tz): From d5d453055be61865f31e385fd0a8b5045605631d Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 15:00:26 +0000 Subject: [PATCH 03/15] reversions --- pandas/core/indexes/base.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3bdb9d3c5b759..b1d82e9918108 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -137,9 +137,10 @@ def _new_Index(cls, d): return _new_PeriodIndex(cls, **d) - if issubclass(cls, ABCMultiIndex) and "labels" in d and "codes" not in d: - # GH#23752 "labels" kwarg has been replaced with "codes" - d["codes"] = d.pop("labels") + if issubclass(cls, ABCMultiIndex): + if "labels" in d and "codes" not in d: + # GH#23752 "labels" kwarg has been replaced with "codes" + d["codes"] = d.pop("labels") return cls.__new__(cls, **d) @@ -5269,11 +5270,11 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: slc = lib.maybe_indices_to_slice( slc.astype(np.intp, copy=False), len(self) ) - if isinstance(slc, np.ndarray): - raise KeyError( - f"Cannot get {side} slice bound for non-unique " - f"label: {repr(original_label)}" - ) + if isinstance(slc, np.ndarray): + raise KeyError( + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" + ) if isinstance(slc, slice): if side == "left": From 23be6a5f4120ee2a346d8580e4ed4240cff3206d Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 19:31:43 +0000 Subject: [PATCH 04/15] make pythonic --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b1d82e9918108..731de13b475ed 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5790,7 +5790,7 @@ def trim_front(strings: List[str]) -> List[str]: Trims zeros and decimal points. """ trimmed = strings - while trimmed and all(x[0] == " " for x in trimmed): + while trimmed and all(x.startswith(" ") for x in trimmed): trimmed = [x[1:] for x in trimmed] return trimmed From b3d1dec54ac29a47dbcc644c98e52402078a7466 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 19:34:07 +0000 Subject: [PATCH 05/15] try moving outside try --- pandas/core/indexes/datetimelike.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c0e2ce3c754cf..f5fe1d9b6e1bf 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -237,11 +237,11 @@ def min(self, axis=None, skipna=True, *args, **kwargs): return self._na_value i8 = self.asi8 - try: - # quick check - if len(i8) and self.is_monotonic and i8[0] != iNaT: - return self._data._box_func(i8[0]) + # quick check + if len(self) and self.is_monotonic and i8[0] != iNaT: + return self._data._box_func(i8[0]) + try: if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() From e0d36eb95727de06705552be42555c5d7488a57d Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 19:02:22 +0000 Subject: [PATCH 06/15] move out of try-except --- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/datetimelike.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5ae627bc24a69..c6196d1dc7b42 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5457,9 +5457,9 @@ def _cmp_method(self, other, op): """ Wrapper used to dispatch comparison operations. """ - if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( - self - ) != len(other): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and ( + len(self) != len(other) + ): raise ValueError("Lengths must match to compare") if not isinstance(other, ABCMultiIndex): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 15030e8b36b5d..63ce59335a332 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -239,16 +239,16 @@ def min(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 # quick check - if len(self) and self.is_monotonic and i8[0] != iNaT: + if self.is_monotonic and i8[0] != iNaT: return self._data._box_func(i8[0]) - try: - if self.hasnans: - if skipna: - min_stamp = self[~self._isnan].asi8.min() - else: - return self._na_value + if self.hasnans: + if skipna: + min_stamp = self[~self._isnan].asi8.min() else: - min_stamp = i8.min() + return self._na_value + else: + min_stamp = i8.min() + try: return self._data._box_func(min_stamp) except ValueError: return self._na_value From 26c419e3a9650b4d0cd4c9a5b1b084d0146b1046 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 5 Nov 2020 07:53:29 +0000 Subject: [PATCH 07/15] keep asi8.min() inside try-except --- pandas/core/indexes/datetimelike.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 63ce59335a332..a374890efc0b3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -241,17 +241,17 @@ def min(self, axis=None, skipna=True, *args, **kwargs): # quick check if self.is_monotonic and i8[0] != iNaT: return self._data._box_func(i8[0]) - if self.hasnans: - if skipna: + try: + if self.hasnans and skipna: min_stamp = self[~self._isnan].asi8.min() + elif not self.hasnans: + min_stamp = i8.min() else: return self._na_value - else: - min_stamp = i8.min() - try: - return self._data._box_func(min_stamp) except ValueError: return self._na_value + else: + return self._data._box_func(min_stamp) def argmin(self, axis=None, skipna=True, *args, **kwargs): """ From 445f2e944c4351e7f1f6335c8b0748e2cd436618 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 8 Nov 2020 08:10:04 +0000 Subject: [PATCH 08/15] empty check --- pandas/core/indexes/datetimelike.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1a064493fc6f3..041143277f496 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -241,17 +241,13 @@ def min(self, axis=None, skipna=True, *args, **kwargs): # quick check if self.is_monotonic and i8[0] != iNaT: return self._data._box_func(i8[0]) - try: - if self.hasnans and skipna: - min_stamp = self[~self._isnan].asi8.min() - elif not self.hasnans: - min_stamp = i8.min() - else: - return self._na_value - except ValueError: - return self._na_value + if self.hasnans and skipna and not self[~self._isnan].empty: + min_stamp = self[~self._isnan].asi8.min() + elif not self.hasnans: + min_stamp = i8.min() else: - return self._data._box_func(min_stamp) + return self._na_value + return self._data._box_func(min_stamp) def argmin(self, axis=None, skipna=True, *args, **kwargs): """ From e8f117c181edad7add436dd483e7d931afaf0155 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 8 Nov 2020 08:14:57 +0000 Subject: [PATCH 09/15] similar simplification in max --- pandas/core/indexes/datetimelike.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 041143277f496..7ddbb858add93 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -289,21 +289,17 @@ def max(self, axis=None, skipna=True, *args, **kwargs): return self._na_value i8 = self.asi8 - try: - # quick check - if len(i8) and self.is_monotonic and i8[-1] != iNaT: - return self._data._box_func(i8[-1]) - - if self.hasnans: - if skipna: - max_stamp = self[~self._isnan].asi8.max() - else: - return self._na_value - else: - max_stamp = i8.max() - return self._data._box_func(max_stamp) - except ValueError: + + # quick check + if self.is_monotonic and i8[-1] != iNaT: + return self._data._box_func(i8[-1]) + if self.hasnans and skipna and not self[~self._isnan].empty: + max_stamp = self[~self._isnan].asi8.max() + elif not self.hasnans: + max_stamp = i8.max() + else: return self._na_value + return self._data._box_func(max_stamp) def argmax(self, axis=None, skipna=True, *args, **kwargs): """ From a58de562ed6b20e62851e2648538179d195e7370 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 23 Nov 2020 18:55:52 +0000 Subject: [PATCH 10/15] fix merge error --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 388f5f060dd95..e722b6be7d92b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -815,7 +815,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- def is_type_compatible(self, kind: str) -> bool: - return typ in [self.inferred_type, "datetime"] + return kind in [self.inferred_type, "datetime"] @property def inferred_type(self) -> str: From be80fb7045d46c9c09243a354185cc85bc13d5a6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Nov 2020 14:31:28 +0000 Subject: [PATCH 11/15] wip --- pandas/core/common.py | 4 ++++ pandas/core/indexes/base.py | 9 +++++---- pandas/core/indexes/datetimelike.py | 5 ++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 24680fc855b0d..7b4763038cb70 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -302,6 +302,10 @@ def is_full_slice(obj, line) -> bool: ) +def is_null_slice(obj) -> bool: + return isinstance(obj, slice) and obj.start is None and obj.stop is None and obj.step is None + + def get_callable_name(obj): # typical case has name if hasattr(obj, "__name__"): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3ec54ffa32964..d3ba9eb56270d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5833,10 +5833,11 @@ def trim_front(strings: List[str]) -> List[str]: """ Trims zeros and decimal points. """ - trimmed = strings - while trimmed and all(x.startswith(" ") for x in trimmed): - trimmed = [x[1:] for x in trimmed] - return trimmed + if not strings: + return strings + while all(x[0] == ' ' for x in strings): + strings = [x[1:] for x in strings] + return strings def _validate_join_method(method: str): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7dd188d6c4084..3aca5aa90576c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -546,10 +546,9 @@ def _get_delete_freq(self, loc: int): np.asarray(loc, dtype=np.intp), len(self) ) if ( - isinstance(loc, slice) - and loc.step in (1, None) - and (loc.start in (0, None) or loc.stop in (len(self), None)) + com.is_full_slice(loc, len(self)) or com.is_null_slice(loc) ): + breakpoint() freq = self.freq return freq From dc0b84ebb5372c6da86a9a82a203a982009c19aa Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Nov 2020 15:15:19 +0000 Subject: [PATCH 12/15] early return --- pandas/core/common.py | 4 ---- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimelike.py | 27 ++++++++++++++------------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 7b4763038cb70..24680fc855b0d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -302,10 +302,6 @@ def is_full_slice(obj, line) -> bool: ) -def is_null_slice(obj) -> bool: - return isinstance(obj, slice) and obj.start is None and obj.stop is None and obj.step is None - - def get_callable_name(obj): # typical case has name if hasattr(obj, "__name__"): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d3ba9eb56270d..4576270b431a3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5835,7 +5835,7 @@ def trim_front(strings: List[str]) -> List[str]: """ if not strings: return strings - while all(x[0] == ' ' for x in strings): + while all(x[0] == " " for x in strings): strings = [x[1:] for x in strings] return strings diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3aca5aa90576c..57f6a8ea0cca5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -243,7 +243,7 @@ def min(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 - if self.is_monotonic_increasing: + if len(i8) and self.is_monotonic_increasing: # quick check if i8[0] != iNaT: return self._data._box_func(i8[0]) @@ -300,7 +300,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 - if self.is_monotonic: + if len(i8) and self.is_monotonic: # quick check if i8[-1] != iNaT: return self._data._box_func(i8[-1]) @@ -545,11 +545,9 @@ def _get_delete_freq(self, loc: int): loc = lib.maybe_indices_to_slice( np.asarray(loc, dtype=np.intp), len(self) ) - if ( - com.is_full_slice(loc, len(self)) or com.is_null_slice(loc) - ): - breakpoint() - freq = self.freq + if isinstance(loc, slice) and loc.step in (1, None): + if loc.start in (0, None) or loc.stop in (len(self), None): + freq = self.freq return freq def _get_insert_freq(self, loc, item): @@ -567,7 +565,7 @@ def _get_insert_freq(self, loc, item): if self.size: if item is NaT: pass - elif loc in [0, -len(self)] and item + self.freq == self[0]: + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq @@ -650,7 +648,8 @@ def is_type_compatible(self, kind: str) -> bool: @Appender(Index.difference.__doc__) def difference(self, other, sort=None): - return super().difference(other, sort=sort)._with_freq(None) + new_idx = super().difference(other, sort=sort)._with_freq(None) + return new_idx def intersection(self, other, sort=False): """ @@ -692,9 +691,10 @@ def intersection(self, other, sort=False): if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) - if isinstance(result, type(self)) and result.freq is None: - # TODO: no tests rely on this; needed? - result = result._with_freq("infer") + if isinstance(result, type(self)): + if result.freq is None: + # TODO: no tests rely on this; needed? + result = result._with_freq("infer") return result elif not self._can_fast_intersect(other): @@ -843,6 +843,7 @@ def _union(self, other, sort): elif result.freq is None: # TODO: no tests rely on this; needed? result = result._with_freq("infer") + return result else: i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) @@ -852,7 +853,7 @@ def _union(self, other, sort): result = type(self)( i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] ) - return result + return result # -------------------------------------------------------------------- # Join Methods From 4074a888f0e2bbac54478438f8516a83314335ae Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Nov 2020 15:25:07 +0000 Subject: [PATCH 13/15] parens --- pandas/core/indexes/datetimes.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e36055b2d4e2a..338cc10a90b99 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -407,11 +407,8 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: this = self if isinstance(other, DatetimeIndex): - if ( - (self.tz is not None) - and (other.tz is None) - or (self.tz is None) - and (other.tz is not None) + if (self.tz is not None and other.tz is None) or ( + self.tz is None and other.tz is not None ): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if not timezones.tz_compare(self.tz, other.tz): From 00d6be00faea7b9946da910ef8f841b4489caacd Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Tue, 24 Nov 2020 15:28:09 +0000 Subject: [PATCH 14/15] :art: --- pandas/core/indexes/datetimes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 338cc10a90b99..51760ce9f3074 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -411,6 +411,7 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: self.tz is None and other.tz is not None ): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + if not timezones.tz_compare(self.tz, other.tz): this = self.tz_convert("UTC") other = other.tz_convert("UTC") From 1b254ae59bff5d4a3b7cf6bb2c7a846d871350f2 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 18:40:19 +0000 Subject: [PATCH 15/15] coverage --- pandas/core/indexes/base.py | 14 ++++++++++++-- pandas/core/indexes/datetimes.py | 4 +--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 75361b45bf10a..1c43aabbd0e04 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3408,7 +3408,9 @@ def _convert_listlike_indexer(self, keyarr): keyarr : numpy.ndarray Return tuple-safe keys. """ - if not isinstance(keyarr, Index): + if isinstance(keyarr, Index): + pass + else: keyarr = self._convert_arr_indexer(keyarr) indexer = self._convert_list_indexer(keyarr) @@ -5925,10 +5927,18 @@ def ensure_has_len(seq): def trim_front(strings: List[str]) -> List[str]: """ Trims zeros and decimal points. + + Examples + -------- + >>> trim_front([" a", " b"]) + ['a', 'b'] + + >>> trim_front([" a", " "]) + ['a', ''] """ if not strings: return strings - while all(x[0] == " " for x in strings): + while all(strings) and all(x[0] == " " for x in strings): strings = [x[1:] for x in strings] return strings diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f8ab06d8c5ab1..156dbcbed21ab 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -408,9 +408,7 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: this = self if isinstance(other, DatetimeIndex): - if (self.tz is not None and other.tz is None) or ( - self.tz is None and other.tz is not None - ): + if (self.tz is None) ^ (other.tz is None): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if not timezones.tz_compare(self.tz, other.tz):