-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN refactor core indexes #37582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN refactor core indexes #37582
Changes from 23 commits
5685cb1
7695e66
d5d4530
23be6a5
b3d1dec
1b0d92c
e0d36eb
fb2bea0
26c419e
d076431
bb0fc86
445f2e9
e8f117c
f1f1deb
e860c45
a58de56
98672b1
be80fb7
dc0b84e
4074a88
00d6be0
d073da9
fdafd36
01c4a06
1b254ae
dffc440
63a8575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -906,9 +906,7 @@ def __repr__(self) -> str_t: | |
if data is None: | ||
data = "" | ||
|
||
res = f"{klass_name}({data}{prepr})" | ||
|
||
return res | ||
return f"{klass_name}({data}{prepr})" | ||
|
||
def _format_space(self) -> str_t: | ||
|
||
|
@@ -987,7 +985,6 @@ def _format_with_header( | |
if is_object_dtype(values.dtype): | ||
values = lib.maybe_convert_objects(values, safe=1) | ||
|
||
if is_object_dtype(values.dtype): | ||
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] | ||
|
||
# could have nans | ||
|
@@ -1615,7 +1612,7 @@ def _drop_level_numbers(self, levnums: List[int]): | |
Drop MultiIndex levels by level _number_, not name. | ||
""" | ||
|
||
if len(levnums) == 0: | ||
if not levnums: | ||
return self | ||
if len(levnums) >= self.nlevels: | ||
raise ValueError( | ||
|
@@ -3170,7 +3167,7 @@ def get_indexer( | |
"Reindexing only valid with uniquely valued Index objects" | ||
) | ||
|
||
if method == "pad" or method == "backfill": | ||
if method in ["pad", "backfill"]: | ||
indexer = self._get_fill_indexer(target, method, limit, tolerance) | ||
elif method == "nearest": | ||
indexer = self._get_nearest_indexer(target, limit, tolerance) | ||
|
@@ -3293,8 +3290,7 @@ def _filter_indexer_tolerance( | |
) -> np.ndarray: | ||
# error: Unsupported left operand type for - ("ExtensionArray") | ||
distance = abs(self._values[indexer] - target) # type: ignore[operator] | ||
indexer = np.where(distance <= tolerance, indexer, -1) | ||
return indexer | ||
return np.where(distance <= tolerance, indexer, -1) | ||
|
||
# -------------------------------------------------------------------- | ||
# Indexer Conversion Methods | ||
|
@@ -3403,9 +3399,7 @@ def _convert_listlike_indexer(self, keyarr): | |
keyarr : numpy.ndarray | ||
Return tuple-safe keys. | ||
""" | ||
if isinstance(keyarr, Index): | ||
pass | ||
else: | ||
if not isinstance(keyarr, Index): | ||
keyarr = self._convert_arr_indexer(keyarr) | ||
|
||
indexer = self._convert_list_indexer(keyarr) | ||
|
@@ -3424,8 +3418,7 @@ def _convert_arr_indexer(self, keyarr): | |
------- | ||
converted_keyarr : array-like | ||
""" | ||
keyarr = com.asarray_tuplesafe(keyarr) | ||
return keyarr | ||
return com.asarray_tuplesafe(keyarr) | ||
|
||
def _convert_list_indexer(self, keyarr): | ||
""" | ||
|
@@ -3793,9 +3786,8 @@ def _join_multi(self, other, how, return_indexers=True): | |
other, level, how=how, return_indexers=return_indexers | ||
) | ||
|
||
if flip_order: | ||
if isinstance(result, tuple): | ||
return result[0], result[2], result[1] | ||
if flip_order and isinstance(result, tuple): | ||
return result[0], result[2], result[1] | ||
return result | ||
|
||
@final | ||
|
@@ -4340,7 +4332,7 @@ def append(self, other): | |
to_concat = [self] | ||
|
||
if isinstance(other, (list, tuple)): | ||
to_concat = to_concat + list(other) | ||
to_concat += list(other) | ||
else: | ||
to_concat.append(other) | ||
|
||
|
@@ -4835,9 +4827,7 @@ def _should_fallback_to_positional(self) -> bool: | |
""" | ||
Should an integer key be treated as positional? | ||
""" | ||
if self.holds_integer() or self.is_boolean(): | ||
return False | ||
return True | ||
return not self.holds_integer() and not self.is_boolean() | ||
|
||
def _get_values_for_loc(self, series: "Series", loc, key): | ||
""" | ||
|
@@ -5298,11 +5288,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): | |
""" | ||
assert kind in ["getitem", "iloc"] | ||
|
||
if key is None: | ||
pass | ||
elif is_integer(key): | ||
pass | ||
else: | ||
if key is not None and not is_integer(key): | ||
raise self._invalid_indexer(form, key) | ||
|
||
def _maybe_cast_slice_bound(self, label, side: str_t, kind): | ||
|
@@ -5607,9 +5593,10 @@ def _cmp_method(self, other, op): | |
elif op in {operator.ne, operator.lt, operator.gt}: | ||
return np.zeros(len(self), dtype=bool) | ||
|
||
if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): | ||
if len(self) != len(other): | ||
raise ValueError("Lengths must match to compare") | ||
if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( | ||
MarcoGorelli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self | ||
) != len(other): | ||
raise ValueError("Lengths must match to compare") | ||
|
||
if not isinstance(other, ABCMultiIndex): | ||
other = extract_array(other, extract_numpy=True) | ||
|
@@ -5930,10 +5917,11 @@ def trim_front(strings: List[str]) -> List[str]: | |
""" | ||
Trims zeros and decimal points. | ||
""" | ||
trimmed = strings | ||
while len(strings) > 0 and all(x[0] == " " for x in trimmed): | ||
trimmed = [x[1:] for x in trimmed] | ||
return trimmed | ||
if not strings: | ||
return strings | ||
while all(x[0] == " " for x in strings): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this going to break when one of the strings becomes empty? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you're absolutely right, thanks! It fails on In [1]: from pandas.core.indexes.base import trim_front
In [2]: trim_front([' ', ' a'])
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-2-1d23255f182c> in <module>
----> 1 trim_front([' ', ' a'])
~/pandas-dev/pandas/core/indexes/base.py in trim_front(strings)
5848 """
5849 trimmed = strings
-> 5850 while len(strings) > 0 and all(x[0] == " " for x in trimmed):
5851 trimmed = [x[1:] for x in trimmed]
5852 return trimmed
~/pandas-dev/pandas/core/indexes/base.py in <genexpr>(.0)
5848 """
5849 trimmed = strings
-> 5850 while len(strings) > 0 and all(x[0] == " " for x in trimmed):
5851 trimmed = [x[1:] for x in trimmed]
5852 return trimmed
IndexError: string index out of range There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at this again, I don't think it's an issue, because It's only ever called from
and
which in turn returns
Nonetheless I can make the condition
and add a tiny test for which that'd be necessary |
||
strings = [x[1:] for x in strings] | ||
return strings | ||
|
||
|
||
def _validate_join_method(method: str): | ||
|
@@ -6001,15 +5989,11 @@ def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np. | |
except ValueError: | ||
data = np.array(data, dtype=np.float64, copy=copy) | ||
|
||
elif inferred == "string": | ||
pass | ||
else: | ||
elif inferred != "string": | ||
data = data.astype(dtype) | ||
elif is_float_dtype(dtype): | ||
inferred = lib.infer_dtype(data, skipna=False) | ||
if inferred == "string": | ||
pass | ||
else: | ||
if inferred != "string": | ||
data = data.astype(dtype) | ||
else: | ||
data = np.array(data, dtype=dtype, copy=copy) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -408,10 +408,9 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]: | |
this = self | ||
|
||
if isinstance(other, DatetimeIndex): | ||
if self.tz is not None: | ||
if other.tz is None: | ||
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") | ||
elif other.tz is not None: | ||
if (self.tz is not None and other.tz is None) or ( | ||
self.tz is None and other.tz is not None | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. was this not viable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as in,
? I don't think that would work because we don't want to raise if both There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah sorry, I didn't actually know that was a Python command - that should work then, thanks so much! |
||
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") | ||
|
||
if not timezones.tz_compare(self.tz, other.tz): | ||
|
@@ -745,8 +744,7 @@ def _get_string_slice(self, key: str): | |
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) | ||
parsed, reso = parsing.parse_time_string(key, freq) | ||
reso = Resolution.from_attrname(reso) | ||
loc = self._partial_date_slice(reso, parsed) | ||
return loc | ||
return self._partial_date_slice(reso, parsed) | ||
|
||
def slice_indexer(self, start=None, end=None, step=None, kind=None): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think we did it this way to make coverage obvious