Skip to content

Commit 9f56342

Browse files
author
MarcoGorelli
committed
Merge remote-tracking branch 'upstream/main' into pt1-deprecate-ban-upcasting
2 parents 0e5fb73 + b37589a commit 9f56342

File tree

10 files changed

+134
-14
lines changed

10 files changed

+134
-14
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,7 @@ Indexing
952952
- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
953953
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
954954
- Bug in :meth:`DataFrame.loc` when setting :class:`DataFrame` with different dtypes coercing values to single dtype (:issue:`50467`)
955+
- Bug in :meth:`DataFrame.sort_values` where ``None`` was not returned when ``by`` is empty list and ``inplace=True`` (:issue:`50643`)
955956
- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
956957
- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
957958
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)

pandas/core/frame.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas._libs.hashtable import duplicated
4949
from pandas._libs.lib import (
5050
NoDefault,
51+
array_equal_fast,
5152
no_default,
5253
)
5354
from pandas._typing import (
@@ -6345,7 +6346,7 @@ def dropna(
63456346
raise ValueError(f"invalid how option: {how}")
63466347

63476348
if np.all(mask):
6348-
result = self.copy()
6349+
result = self.copy(deep=None)
63496350
else:
63506351
result = self.loc(axis=axis)[mask]
63516352

@@ -6695,7 +6696,16 @@ def sort_values(
66956696
k, kind=kind, ascending=ascending, na_position=na_position, key=key
66966697
)
66976698
else:
6698-
return self.copy()
6699+
if inplace:
6700+
return self._update_inplace(self)
6701+
else:
6702+
return self.copy(deep=None)
6703+
6704+
if array_equal_fast(indexer, np.arange(0, len(indexer), dtype=indexer.dtype)):
6705+
if inplace:
6706+
return self._update_inplace(self)
6707+
else:
6708+
return self.copy(deep=None)
66996709

67006710
new_data = self._mgr.take(
67016711
indexer, axis=self._get_block_manager_axis(axis), verify=False

pandas/core/internals/managers.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1955,9 +1955,17 @@ def _blklocs(self):
19551955
"""compat with BlockManager"""
19561956
return None
19571957

1958-
def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager:
1958+
def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
19591959
# similar to get_slice, but not restricted to slice indexer
19601960
blk = self._block
1961+
if (
1962+
using_copy_on_write()
1963+
and isinstance(indexer, np.ndarray)
1964+
and len(indexer) > 0
1965+
and com.is_bool_indexer(indexer)
1966+
and indexer.all()
1967+
):
1968+
return type(self)(blk, self.index, [weakref.ref(blk)], parent=self)
19611969
array = blk._slice(indexer)
19621970
if array.ndim > 1:
19631971
# This will be caught by Series._get_values

pandas/core/nanops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,9 @@ def _wrap_results(result, dtype: np.dtype, fill_value=None):
370370
result = np.nan
371371

372372
if isna(result):
373-
result = np.datetime64("NaT", "ns")
373+
result = np.datetime64("NaT", "ns").astype(dtype)
374374
else:
375-
result = np.int64(result).view("datetime64[ns]")
375+
result = np.int64(result).view(dtype)
376376
# retain original unit
377377
result = result.astype(dtype, copy=False)
378378
else:

pandas/core/series.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Any:
921921
"""
922922
return self._values[i]
923923

924-
def _slice(self, slobj: slice, axis: Axis = 0) -> Series:
924+
def _slice(self, slobj: slice | np.ndarray, axis: Axis = 0) -> Series:
925925
# axis kwarg is retained for compat with NDFrame method
926926
# _slice is *always* positional
927927
return self._get_values(slobj)
@@ -3559,6 +3559,13 @@ def sort_values(
35593559
values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
35603560
sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
35613561

3562+
if array_equal_fast(
3563+
sorted_index, np.arange(0, len(sorted_index), dtype=sorted_index.dtype)
3564+
):
3565+
if inplace:
3566+
return self._update_inplace(self)
3567+
return self.copy(deep=None)
3568+
35623569
result = self._constructor(
35633570
self._values[sorted_index], index=self.index[sorted_index]
35643571
)
@@ -5576,7 +5583,7 @@ def dropna(
55765583
return result
55775584
else:
55785585
if not inplace:
5579-
return self.copy()
5586+
return self.copy(deep=None)
55805587
return None
55815588

55825589
# ----------------------------------------------------------------------

pandas/tests/arrays/datetimes/test_reductions.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111

1212
class TestReductions:
13+
@pytest.fixture(params=["s", "ms", "us", "ns"])
14+
def unit(self, request):
15+
return request.param
16+
1317
@pytest.fixture
1418
def arr1d(self, tz_naive_fixture):
1519
"""Fixture returning DatetimeArray with parametrized timezones"""
@@ -28,17 +32,20 @@ def arr1d(self, tz_naive_fixture):
2832
)
2933
return arr
3034

31-
def test_min_max(self, arr1d):
35+
def test_min_max(self, arr1d, unit):
3236
arr = arr1d
37+
arr = arr.as_unit(unit)
3338
tz = arr.tz
3439

3540
result = arr.min()
36-
expected = pd.Timestamp("2000-01-02", tz=tz)
41+
expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit)
3742
assert result == expected
43+
assert result.unit == expected.unit
3844

3945
result = arr.max()
40-
expected = pd.Timestamp("2000-01-05", tz=tz)
46+
expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit)
4147
assert result == expected
48+
assert result.unit == expected.unit
4249

4350
result = arr.min(skipna=False)
4451
assert result is NaT

pandas/tests/copy_view/test_internals.py

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def test_clear_parent(using_copy_on_write):
6565
assert subset._mgr.parent is None
6666

6767

68+
@pytest.mark.single_cpu
6869
@td.skip_array_manager_invalid_test
6970
def test_switch_options():
7071
# ensure we can switch the value of the option within one session

pandas/tests/copy_view/test_methods.py

+73
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,40 @@ def test_add_suffix(using_copy_on_write):
503503
tm.assert_frame_equal(df, df_orig)
504504

505505

506+
@pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)])
507+
def test_dropna(using_copy_on_write, axis, val):
508+
df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"})
509+
df_orig = df.copy()
510+
df2 = df.dropna(axis=axis)
511+
512+
if using_copy_on_write:
513+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
514+
else:
515+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
516+
517+
df2.iloc[0, 0] = 0
518+
if using_copy_on_write:
519+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
520+
tm.assert_frame_equal(df, df_orig)
521+
522+
523+
@pytest.mark.parametrize("val", [5, 5.5])
524+
def test_dropna_series(using_copy_on_write, val):
525+
ser = Series([1, val, 4])
526+
ser_orig = ser.copy()
527+
ser2 = ser.dropna()
528+
529+
if using_copy_on_write:
530+
assert np.shares_memory(ser2.values, ser.values)
531+
else:
532+
assert not np.shares_memory(ser2.values, ser.values)
533+
534+
ser2.iloc[0] = 0
535+
if using_copy_on_write:
536+
assert not np.shares_memory(ser2.values, ser.values)
537+
tm.assert_series_equal(ser, ser_orig)
538+
539+
506540
@pytest.mark.parametrize(
507541
"method",
508542
[
@@ -644,6 +678,45 @@ def test_sort_index(using_copy_on_write):
644678
tm.assert_series_equal(ser, ser_orig)
645679

646680

681+
@pytest.mark.parametrize(
682+
"obj, kwargs",
683+
[(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
684+
)
685+
def test_sort_values(using_copy_on_write, obj, kwargs):
686+
obj_orig = obj.copy()
687+
obj2 = obj.sort_values(**kwargs)
688+
689+
if using_copy_on_write:
690+
assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
691+
else:
692+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
693+
694+
# mutating df triggers a copy-on-write for the column / block
695+
obj2.iloc[0] = 0
696+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
697+
tm.assert_equal(obj, obj_orig)
698+
699+
700+
@pytest.mark.parametrize(
701+
"obj, kwargs",
702+
[(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
703+
)
704+
def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manager):
705+
obj_orig = obj.copy()
706+
view = obj[:]
707+
obj.sort_values(inplace=True, **kwargs)
708+
709+
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
710+
711+
# mutating obj triggers a copy-on-write for the column / block
712+
obj.iloc[0] = 0
713+
if using_copy_on_write:
714+
assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
715+
tm.assert_equal(view, obj_orig)
716+
else:
717+
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
718+
719+
647720
def test_reorder_levels(using_copy_on_write):
648721
index = MultiIndex.from_tuples(
649722
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]

pandas/tests/frame/methods/test_sort_values.py

+8
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,14 @@ def test_sort_values_reshaping(self):
619619

620620
tm.assert_frame_equal(df, expected)
621621

622+
def test_sort_values_no_by_inplace(self):
623+
# GH#50643
624+
df = DataFrame({"a": [1, 2, 3]})
625+
expected = df.copy()
626+
result = df.sort_values(by=[], inplace=True)
627+
tm.assert_frame_equal(df, expected)
628+
assert result is None
629+
622630

623631
class TestDataFrameSortKey: # test key sorting (issue 27237)
624632
def test_sort_values_inplace_key(self, sort_by_key):

pandas/tests/test_nanops.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -1157,10 +1157,14 @@ def prng(self):
11571157

11581158

11591159
class TestDatetime64NaNOps:
1160+
@pytest.fixture(params=["s", "ms", "us", "ns"])
1161+
def unit(self, request):
1162+
return request.param
1163+
11601164
# Enabling mean changes the behavior of DataFrame.mean
11611165
# See https://github.com/pandas-dev/pandas/issues/24752
1162-
def test_nanmean(self):
1163-
dti = pd.date_range("2016-01-01", periods=3)
1166+
def test_nanmean(self, unit):
1167+
dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
11641168
expected = dti[1]
11651169

11661170
for obj in [dti, DatetimeArray(dti), Series(dti)]:
@@ -1173,8 +1177,9 @@ def test_nanmean(self):
11731177
result = nanops.nanmean(obj)
11741178
assert result == expected
11751179

1176-
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
1177-
def test_nanmean_skipna_false(self, dtype):
1180+
@pytest.mark.parametrize("constructor", ["M8", "m8"])
1181+
def test_nanmean_skipna_false(self, constructor, unit):
1182+
dtype = f"{constructor}[{unit}]"
11781183
arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
11791184

11801185
arr[-1, -1] = "NaT"

0 commit comments

Comments
 (0)