-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: Index.append() refactoring #16236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5029592
d5c7d77
44800a4
528295d
554ee79
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
_TD_DTYPE) | ||
from pandas.core.dtypes.generic import ( | ||
ABCDatetimeIndex, ABCTimedeltaIndex, | ||
ABCPeriodIndex) | ||
ABCPeriodIndex, ABCRangeIndex) | ||
|
||
|
||
def get_dtype_kinds(l): | ||
|
@@ -41,6 +41,8 @@ def get_dtype_kinds(l): | |
typ = 'category' | ||
elif is_sparse(arr): | ||
typ = 'sparse' | ||
elif isinstance(arr, ABCRangeIndex): | ||
typ = 'range' | ||
elif is_datetimetz(arr): | ||
# if to_concat contains different tz, | ||
# the result must be object dtype | ||
|
@@ -559,3 +561,47 @@ def convert_sparse(x, axis): | |
# coerce to object if needed | ||
result = result.astype('object') | ||
return result | ||
|
||
|
||
def _concat_rangeindex_same_dtype(indexes): | ||
""" | ||
Concatenates multiple RangeIndex instances. All members of "indexes" must | ||
be of type RangeIndex; result will be RangeIndex if possible, Int64Index | ||
otherwise. E.g.: | ||
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) | ||
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) | ||
""" | ||
|
||
start = step = next = None | ||
|
||
for obj in indexes: | ||
if not len(obj): | ||
continue | ||
|
||
if start is None: | ||
# This is set by the first non-empty index | ||
start = obj._start | ||
if step is None and len(obj) > 1: | ||
step = obj._step | ||
elif step is None: | ||
# First non-empty index had only one element | ||
if obj._start == start: | ||
return _concat_index_asobject(indexes) | ||
step = obj._start - start | ||
|
||
non_consecutive = ((step != obj._step and len(obj) > 1) or | ||
(next is not None and obj._start != next)) | ||
if non_consecutive: | ||
# Int64Index._append_same_dtype([ix.astype(int) for ix in indexes]) | ||
# would be preferred... but it currently resorts to | ||
# _concat_index_asobject anyway. | ||
return _concat_index_asobject(indexes) | ||
|
||
if step is not None: | ||
next = obj[-1] + step | ||
|
||
if start is None: | ||
start = obj._start | ||
step = obj._step | ||
stop = obj._stop if next is None else next | ||
return indexes[0].__class__(start, stop, step) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is ugly... but the only alternative I see (an import inside the function) is uglier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its ok here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ah no, this wouldn't play nice with the case when no range index is returned, ignore this |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1741,18 +1741,17 @@ def append(self, other): | |
names = set([obj.name for obj in to_concat]) | ||
name = None if len(names) > 1 else self.name | ||
|
||
if self.is_categorical(): | ||
# if calling index is category, don't check dtype of others | ||
from pandas.core.indexes.category import CategoricalIndex | ||
return CategoricalIndex._append_same_dtype(self, to_concat, name) | ||
return self._concat(to_concat, name) | ||
|
||
def _concat(self, to_concat, name): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I think it would make more sense to change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since it is only use by append, I prefer using append in the name, but no strong feelings There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right that it's currently used only by append, but usually you expect x.append(y) to concatenate x to y or to elements of y; instead this only concatenates elements of y. So since you don't object I will go with my proposal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
in the end it is used to concatenate both y to x, just that this is passed like that in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sure, I don't object to that. We can agree it is a concat operation used to implement appending: the switch happens when |
||
|
||
typs = _concat.get_dtype_kinds(to_concat) | ||
|
||
if len(typs) == 1: | ||
return self._append_same_dtype(to_concat, name=name) | ||
return self._concat_same_dtype(to_concat, name=name) | ||
return _concat._concat_index_asobject(to_concat, name=name) | ||
|
||
def _append_same_dtype(self, to_concat, name): | ||
def _concat_same_dtype(self, to_concat, name): | ||
""" | ||
Concatenate to_concat which has the same class | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you explain this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See #17307
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Merged this, you can update this line in the other PR if we merge that