Skip to content

CLN refactor core indexes #37582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Dec 22, 2020
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5685cb1
refactor core indexes
MarcoGorelli Nov 2, 2020
7695e66
parens
MarcoGorelli Nov 2, 2020
d5d4530
reversions
MarcoGorelli Nov 2, 2020
23be6a5
make pythonic
MarcoGorelli Nov 2, 2020
b3d1dec
try moving outside try
MarcoGorelli Nov 2, 2020
1b0d92c
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 4, 2020
e0d36eb
move out of try-except
MarcoGorelli Nov 4, 2020
fb2bea0
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 5, 2020
26c419e
keep asi8.min() inside try-except
MarcoGorelli Nov 5, 2020
d076431
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 6, 2020
bb0fc86
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 8, 2020
445f2e9
empty check
MarcoGorelli Nov 8, 2020
e8f117c
similar simplification in max
MarcoGorelli Nov 8, 2020
f1f1deb
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 9, 2020
e860c45
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 23, 2020
a58de56
fix merge error
MarcoGorelli Nov 23, 2020
98672b1
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 24, 2020
be80fb7
wip
MarcoGorelli Nov 24, 2020
dc0b84e
early return
MarcoGorelli Nov 24, 2020
4074a88
parens
MarcoGorelli Nov 24, 2020
00d6be0
:art:
MarcoGorelli Nov 24, 2020
d073da9
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Nov 29, 2020
fdafd36
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Dec 6, 2020
01c4a06
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Dec 9, 2020
1b254ae
coverage
MarcoGorelli Dec 9, 2020
dffc440
:twisted_rightwards_arrows: Merge remote-tracking branch 'upstream/ma…
MarcoGorelli Dec 16, 2020
63a8575
Merge remote-tracking branch 'upstream/master' into refactor-core-ind…
MarcoGorelli Dec 22, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,4 @@ def all_indexes_same(indexes):
"""
itr = iter(indexes)
first = next(itr)
for index in itr:
if not first.equals(index):
return False
return True
return all(first.equals(index) for index in itr)
60 changes: 22 additions & 38 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,9 +906,7 @@ def __repr__(self) -> str_t:
if data is None:
data = ""

res = f"{klass_name}({data}{prepr})"

return res
return f"{klass_name}({data}{prepr})"

def _format_space(self) -> str_t:

Expand Down Expand Up @@ -987,7 +985,6 @@ def _format_with_header(
if is_object_dtype(values.dtype):
values = lib.maybe_convert_objects(values, safe=1)

if is_object_dtype(values.dtype):
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

# could have nans
Expand Down Expand Up @@ -1615,7 +1612,7 @@ def _drop_level_numbers(self, levnums: List[int]):
Drop MultiIndex levels by level _number_, not name.
"""

if len(levnums) == 0:
if not levnums:
return self
if len(levnums) >= self.nlevels:
raise ValueError(
Expand Down Expand Up @@ -3170,7 +3167,7 @@ def get_indexer(
"Reindexing only valid with uniquely valued Index objects"
)

if method == "pad" or method == "backfill":
if method in ["pad", "backfill"]:
indexer = self._get_fill_indexer(target, method, limit, tolerance)
elif method == "nearest":
indexer = self._get_nearest_indexer(target, limit, tolerance)
Expand Down Expand Up @@ -3293,8 +3290,7 @@ def _filter_indexer_tolerance(
) -> np.ndarray:
# error: Unsupported left operand type for - ("ExtensionArray")
distance = abs(self._values[indexer] - target) # type: ignore[operator]
indexer = np.where(distance <= tolerance, indexer, -1)
return indexer
return np.where(distance <= tolerance, indexer, -1)

# --------------------------------------------------------------------
# Indexer Conversion Methods
Expand Down Expand Up @@ -3403,9 +3399,7 @@ def _convert_listlike_indexer(self, keyarr):
keyarr : numpy.ndarray
Return tuple-safe keys.
"""
if isinstance(keyarr, Index):
pass
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we did it this way to make coverage obvious

if not isinstance(keyarr, Index):
keyarr = self._convert_arr_indexer(keyarr)

indexer = self._convert_list_indexer(keyarr)
Expand All @@ -3424,8 +3418,7 @@ def _convert_arr_indexer(self, keyarr):
-------
converted_keyarr : array-like
"""
keyarr = com.asarray_tuplesafe(keyarr)
return keyarr
return com.asarray_tuplesafe(keyarr)

def _convert_list_indexer(self, keyarr):
"""
Expand Down Expand Up @@ -3793,9 +3786,8 @@ def _join_multi(self, other, how, return_indexers=True):
other, level, how=how, return_indexers=return_indexers
)

if flip_order:
if isinstance(result, tuple):
return result[0], result[2], result[1]
if flip_order and isinstance(result, tuple):
return result[0], result[2], result[1]
return result

@final
Expand Down Expand Up @@ -4340,7 +4332,7 @@ def append(self, other):
to_concat = [self]

if isinstance(other, (list, tuple)):
to_concat = to_concat + list(other)
to_concat += list(other)
else:
to_concat.append(other)

Expand Down Expand Up @@ -4835,9 +4827,7 @@ def _should_fallback_to_positional(self) -> bool:
"""
Should an integer key be treated as positional?
"""
if self.holds_integer() or self.is_boolean():
return False
return True
return not self.holds_integer() and not self.is_boolean()

def _get_values_for_loc(self, series: "Series", loc, key):
"""
Expand Down Expand Up @@ -5298,11 +5288,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t):
"""
assert kind in ["getitem", "iloc"]

if key is None:
pass
elif is_integer(key):
pass
else:
if key is not None and not is_integer(key):
raise self._invalid_indexer(form, key)

def _maybe_cast_slice_bound(self, label, side: str_t, kind):
Expand Down Expand Up @@ -5607,9 +5593,10 @@ def _cmp_method(self, other, op):
elif op in {operator.ne, operator.lt, operator.gt}:
return np.zeros(len(self), dtype=bool)

if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)):
if len(self) != len(other):
raise ValueError("Lengths must match to compare")
if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
self
) != len(other):
raise ValueError("Lengths must match to compare")

if not isinstance(other, ABCMultiIndex):
other = extract_array(other, extract_numpy=True)
Expand Down Expand Up @@ -5930,10 +5917,11 @@ def trim_front(strings: List[str]) -> List[str]:
"""
Trims zeros and decimal points.
"""
trimmed = strings
while len(strings) > 0 and all(x[0] == " " for x in trimmed):
trimmed = [x[1:] for x in trimmed]
return trimmed
if not strings:
return strings
while all(x[0] == " " for x in strings):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this going to break when one of the strings becomes empty?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're absolutely right, thanks! It fails on master too, but still, worth fixing while we're modifying these lines

In [1]: from pandas.core.indexes.base import trim_front

In [2]: trim_front([' ', ' a'])
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-2-1d23255f182c> in <module>
----> 1 trim_front([' ', ' a'])

~/pandas-dev/pandas/core/indexes/base.py in trim_front(strings)
   5848     """
   5849     trimmed = strings
-> 5850     while len(strings) > 0 and all(x[0] == " " for x in trimmed):
   5851         trimmed = [x[1:] for x in trimmed]
   5852     return trimmed

~/pandas-dev/pandas/core/indexes/base.py in <genexpr>(.0)
   5848     """
   5849     trimmed = strings
-> 5850     while len(strings) > 0 and all(x[0] == " " for x in trimmed):
   5851         trimmed = [x[1:] for x in trimmed]
   5852     return trimmed

IndexError: string index out of range

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this again, I don't think it's an issue, because trim_front is only ever called with a list of strings which are all of the same length.

It's only ever called from pd.Index._format_with_header:

result = trim_front(format_array(values, None, justify="left"))

and format_array from pandas/io/formats/format.py returns

fmt_obj.get_result()

which in turn returns

_make_fixed_width(fmt_values, self.justify)

Nonetheless I can make the condition

    while all(strings) and all(x[0] == " " for x in strings):

and add a tiny test for which that'd be necessary

strings = [x[1:] for x in strings]
return strings


def _validate_join_method(method: str):
Expand Down Expand Up @@ -6001,15 +5989,11 @@ def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.
except ValueError:
data = np.array(data, dtype=np.float64, copy=copy)

elif inferred == "string":
pass
else:
elif inferred != "string":
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "string":
pass
else:
if inferred != "string":
data = data.astype(dtype)
else:
data = np.array(data, dtype=dtype, copy=copy)
Expand Down
10 changes: 4 additions & 6 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,10 +408,9 @@ def _maybe_utc_convert(self, other: Index) -> Tuple["DatetimeIndex", Index]:
this = self

if isinstance(other, DatetimeIndex):
if self.tz is not None:
if other.tz is None:
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
elif other.tz is not None:
if (self.tz is not None and other.tz is None) or (
self.tz is None and other.tz is not None
):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(self.tz is None) ^ (other.tz is None)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(self.tz is None ^ other.tz is not None)? AFAIKT the check is to see if one of them is None but the other one isn't - I've tried regrouping the parens to make it clearer anyway, thanks

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was this not viable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as in,

if (self.tz is None) or (other.tz is None):
    raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

?

I don't think that would work because we don't want to raise if both self.tz and other.tz are None, just if one is but the other isn't

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not (self.tz is None) or (other.tz is None), but (self.tz is None) ^ (other.tz is None)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah sorry, I didn't actually know that was a Python command - that should work then, thanks so much!

raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if not timezones.tz_compare(self.tz, other.tz):
Expand Down Expand Up @@ -745,8 +744,7 @@ def _get_string_slice(self, key: str):
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
parsed, reso = parsing.parse_time_string(key, freq)
reso = Resolution.from_attrname(reso)
loc = self._partial_date_slice(reso, parsed)
return loc
return self._partial_date_slice(reso, parsed)

def slice_indexer(self, start=None, end=None, step=None, kind=None):
"""
Expand Down