Skip to content

Move sort index to generic #36177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Sep 15, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
12dafcf
CLN: move sort_index to core/generic.py #8283
fangchenli Sep 6, 2020
f6ca614
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 6, 2020
c2e2cb3
CLN: ignore typing check in core/generic.py #8283
fangchenli Sep 6, 2020
909fec6
CLN: remove unused import #8283
fangchenli Sep 7, 2020
524163a
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 7, 2020
f0958b8
refactor
fangchenli Sep 8, 2020
1a06450
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 8, 2020
3ebc89d
create function get indexer
fangchenli Sep 8, 2020
260fcc2
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 8, 2020
a7fae56
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 9, 2020
8cb2fec
move get_indexer to sorting
fangchenli Sep 9, 2020
a035fbd
remove unused import
fangchenli Sep 9, 2020
ec57ba5
Merge branch 'master' into move-sort_index-to-generic
fangchenli Sep 9, 2020
452ec68
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 9, 2020
9691439
fix type import
fangchenli Sep 9, 2020
940b692
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 11, 2020
c79cd81
simplify sort_index
fangchenli Sep 11, 2020
8b9732a
add doc string
fangchenli Sep 11, 2020
4461b7b
remove unused import
fangchenli Sep 11, 2020
4402679
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 11, 2020
9c184b7
Merge remote-tracking branch 'upstream/master' into move-sort_index-t…
fangchenli Sep 13, 2020
15d7401
type get indexer
fangchenli Sep 14, 2020
58a62ee
fix type
fangchenli Sep 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 11 additions & 57 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@
)
from pandas.core.reshape.melt import melt
from pandas.core.series import Series
from pandas.core.sorting import ensure_key_mapped

from pandas.io.common import get_filepath_or_buffer
from pandas.io.formats import console, format as fmt
Expand Down Expand Up @@ -5466,62 +5465,17 @@ def sort_index(
C 3
d 4
"""
# TODO: this can be combined with Series.sort_index impl as
# almost identical

inplace = validate_bool_kwarg(inplace, "inplace")

axis = self._get_axis_number(axis)
labels = self._get_axis(axis)
labels = ensure_key_mapped(labels, key, levels=level)

# make sure that the axis is lexsorted to start
# if not we need to reconstruct to get the correct indexer
labels = labels._sort_levels_monotonic()
if level is not None:
new_axis, indexer = labels.sortlevel(
level, ascending=ascending, sort_remaining=sort_remaining
)

elif isinstance(labels, MultiIndex):
from pandas.core.sorting import lexsort_indexer

indexer = lexsort_indexer(
labels._get_codes_for_sorting(),
orders=ascending,
na_position=na_position,
)
else:
from pandas.core.sorting import nargsort

# Check monotonic-ness before sort an index
# GH11080
if (ascending and labels.is_monotonic_increasing) or (
not ascending and labels.is_monotonic_decreasing
):
if inplace:
return
else:
return self.copy()

indexer = nargsort(
labels, kind=kind, ascending=ascending, na_position=na_position
)

baxis = self._get_block_manager_axis(axis)
new_data = self._mgr.take(indexer, axis=baxis, verify=False)

# reconstruct axis if needed
new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()

if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

result = self._constructor(new_data)
if inplace:
return self._update_inplace(result)
else:
return result.__finalize__(self, method="sort_index")
return super().sort_index(
axis,
level,
ascending,
inplace,
kind,
na_position,
sort_remaining,
ignore_index,
key,
)

def value_counts(
self,
Expand Down
95 changes: 95 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
CompressionOptions,
FilePathOrBuffer,
FrameOrSeries,
IndexKeyFunc,
JSONSerializable,
Label,
Level,
Expand Down Expand Up @@ -68,6 +69,7 @@
from pandas.core.dtypes.common import (
ensure_int64,
ensure_object,
ensure_platform_int,
ensure_str,
is_bool,
is_bool_dtype,
Expand Down Expand Up @@ -96,6 +98,7 @@
import pandas.core.common as com
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.flags import Flags
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import Period, PeriodIndex
Expand All @@ -104,6 +107,7 @@
from pandas.core.missing import find_valid_index
from pandas.core.ops import _align_method_FRAME
from pandas.core.shared_docs import _shared_docs
from pandas.core.sorting import ensure_key_mapped
from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window

from pandas.io.formats import format as fmt
Expand Down Expand Up @@ -4418,6 +4422,97 @@ def sort_values(
"""
raise AbstractMethodError(self)

def sort_index(
self,
axis=0,
level=None,
ascending: bool_t = True,
inplace: bool_t = False,
kind: str = "quicksort",
na_position: str = "last",
sort_remaining: bool_t = True,
ignore_index: bool_t = False,
key: IndexKeyFunc = None,
):

inplace = validate_bool_kwarg(inplace, "inplace")

is_dataframe = isinstance(self, ABCDataFrame)
if is_dataframe:
axis = self._get_axis_number(axis)
labels = self._get_axis(axis)
labels = ensure_key_mapped(labels, key, levels=level)
labels = labels._sort_levels_monotonic()
target = labels
else:
_ = self._get_axis_number(axis)
index = ensure_key_mapped(self.index, key, levels=level)
target = index

if level is not None:
new_index, indexer = target.sortlevel(
level, ascending=ascending, sort_remaining=sort_remaining
)

elif isinstance(target, MultiIndex):
from pandas.core.sorting import lexsort_indexer

if not is_dataframe:
target = target._sort_levels_monotonic()

indexer = lexsort_indexer(
target._get_codes_for_sorting(), # type: ignore[attr-defined]
orders=ascending,
na_position=na_position,
)

else:
from pandas.core.sorting import nargsort

# Check monotonic-ness before sort an index
# GH11080
if (ascending and target.is_monotonic_increasing) or (
not ascending and target.is_monotonic_decreasing
):
if inplace:
return
else:
return self.copy()

indexer = nargsort(
target, kind=kind, ascending=ascending, na_position=na_position
)

if is_dataframe:
baxis = self._get_block_manager_axis(axis)
new_data = self._mgr.take(indexer, axis=baxis, verify=False)

# reconstruct axis if needed
new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()

if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

result = self._constructor(new_data)

else:
indexer = ensure_platform_int(indexer)
new_index = self.index.take(indexer)
new_index = new_index._sort_levels_monotonic()

new_values = self._values.take(indexer)
result = self._constructor(
new_values, index=new_index # type: ignore[call-arg]
)

if ignore_index:
result.index = ibase.default_index(len(result))

if inplace:
return self._update_inplace(result)
else:
return result.__finalize__(self, method="sort_index")

@doc(
klass=_shared_doc_kwargs["klass"],
axes=_shared_doc_kwargs["axes"],
Expand Down
64 changes: 11 additions & 53 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3462,59 +3462,17 @@ def sort_index(
dtype: int64
"""

# TODO: this can be combined with DataFrame.sort_index impl as
# almost identical
inplace = validate_bool_kwarg(inplace, "inplace")
# Validate the axis parameter
self._get_axis_number(axis)
index = ensure_key_mapped(self.index, key, levels=level)

if level is not None:
new_index, indexer = index.sortlevel(
level, ascending=ascending, sort_remaining=sort_remaining
)

elif isinstance(index, MultiIndex):
from pandas.core.sorting import lexsort_indexer

labels = index._sort_levels_monotonic()

indexer = lexsort_indexer(
labels._get_codes_for_sorting(),
orders=ascending,
na_position=na_position,
)
else:
from pandas.core.sorting import nargsort

# Check monotonic-ness before sort an index
# GH11080
if (ascending and index.is_monotonic_increasing) or (
not ascending and index.is_monotonic_decreasing
):
if inplace:
return
else:
return self.copy()

indexer = nargsort(
index, kind=kind, ascending=ascending, na_position=na_position
)

indexer = ensure_platform_int(indexer)
new_index = self.index.take(indexer)
new_index = new_index._sort_levels_monotonic()

new_values = self._values.take(indexer)
result = self._constructor(new_values, index=new_index)

if ignore_index:
result.index = ibase.default_index(len(result))

if inplace:
self._update_inplace(result)
else:
return result.__finalize__(self, method="sort_index")
return super().sort_index(
axis,
level,
ascending,
inplace,
kind,
na_position,
sort_remaining,
ignore_index,
key,
)

def argsort(self, axis=0, kind="quicksort", order=None) -> "Series":
"""
Expand Down