Skip to content

Commit 01141ab

Browse files
authored
ENH: Add lazy copy for sort_values (#50643)
1 parent f15a65c commit 01141ab

File tree

5 files changed

+66
-1
lines changed

5 files changed

+66
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,7 @@ Indexing
952952
- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
953953
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
954954
- Bug in :meth:`DataFrame.loc` when setting :class:`DataFrame` with different dtypes coercing values to single dtype (:issue:`50467`)
955+
- Bug in :meth:`DataFrame.sort_values` where ``None`` was not returned when ``by`` is empty list and ``inplace=True`` (:issue:`50643`)
955956
- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
956957
- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
957958
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)

pandas/core/frame.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas._libs.hashtable import duplicated
4949
from pandas._libs.lib import (
5050
NoDefault,
51+
array_equal_fast,
5152
no_default,
5253
)
5354
from pandas._typing import (
@@ -6695,7 +6696,16 @@ def sort_values(
66956696
k, kind=kind, ascending=ascending, na_position=na_position, key=key
66966697
)
66976698
else:
6698-
return self.copy()
6699+
if inplace:
6700+
return self._update_inplace(self)
6701+
else:
6702+
return self.copy(deep=None)
6703+
6704+
if array_equal_fast(indexer, np.arange(0, len(indexer), dtype=indexer.dtype)):
6705+
if inplace:
6706+
return self._update_inplace(self)
6707+
else:
6708+
return self.copy(deep=None)
66996709

67006710
new_data = self._mgr.take(
67016711
indexer, axis=self._get_block_manager_axis(axis), verify=False

pandas/core/series.py

+7
Original file line numberDiff line numberDiff line change
@@ -3559,6 +3559,13 @@ def sort_values(
35593559
values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
35603560
sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
35613561

3562+
if array_equal_fast(
3563+
sorted_index, np.arange(0, len(sorted_index), dtype=sorted_index.dtype)
3564+
):
3565+
if inplace:
3566+
return self._update_inplace(self)
3567+
return self.copy(deep=None)
3568+
35623569
result = self._constructor(
35633570
self._values[sorted_index], index=self.index[sorted_index]
35643571
)

pandas/tests/copy_view/test_methods.py

+39
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,45 @@ def test_sort_index(using_copy_on_write):
644644
tm.assert_series_equal(ser, ser_orig)
645645

646646

647+
@pytest.mark.parametrize(
648+
"obj, kwargs",
649+
[(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
650+
)
651+
def test_sort_values(using_copy_on_write, obj, kwargs):
652+
obj_orig = obj.copy()
653+
obj2 = obj.sort_values(**kwargs)
654+
655+
if using_copy_on_write:
656+
assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
657+
else:
658+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
659+
660+
# mutating df triggers a copy-on-write for the column / block
661+
obj2.iloc[0] = 0
662+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
663+
tm.assert_equal(obj, obj_orig)
664+
665+
666+
@pytest.mark.parametrize(
667+
"obj, kwargs",
668+
[(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
669+
)
670+
def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manager):
671+
obj_orig = obj.copy()
672+
view = obj[:]
673+
obj.sort_values(inplace=True, **kwargs)
674+
675+
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
676+
677+
# mutating obj triggers a copy-on-write for the column / block
678+
obj.iloc[0] = 0
679+
if using_copy_on_write:
680+
assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
681+
tm.assert_equal(view, obj_orig)
682+
else:
683+
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
684+
685+
647686
def test_reorder_levels(using_copy_on_write):
648687
index = MultiIndex.from_tuples(
649688
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]

pandas/tests/frame/methods/test_sort_values.py

+8
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,14 @@ def test_sort_values_reshaping(self):
619619

620620
tm.assert_frame_equal(df, expected)
621621

622+
def test_sort_values_no_by_inplace(self):
623+
# GH#50643
624+
df = DataFrame({"a": [1, 2, 3]})
625+
expected = df.copy()
626+
result = df.sort_values(by=[], inplace=True)
627+
tm.assert_frame_equal(df, expected)
628+
assert result is None
629+
622630

623631
class TestDataFrameSortKey: # test key sorting (issue 27237)
624632
def test_sort_values_inplace_key(self, sort_by_key):

0 commit comments

Comments
 (0)