Skip to content

Commit 1728768

Browse files
phoflyehoshuadimarsky
authored andcommitted
REGR: setitem writing into RangeIndex instead of creating a copy (pandas-dev#47143)
1 parent 066a6c9 commit 1728768

File tree

5 files changed

+39
-12
lines changed

5 files changed

+39
-12
lines changed

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`)
18+
- Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`)
1819
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1920
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
2021
- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`)

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ def sanitize_array(
531531
dtype = dtype.numpy_dtype
532532

533533
# extract ndarray or ExtensionArray, ensure we have no PandasArray
534-
data = extract_array(data, extract_numpy=True)
534+
data = extract_array(data, extract_numpy=True, extract_range=True)
535535

536536
if isinstance(data, np.ndarray) and data.ndim == 0:
537537
if dtype is None:
@@ -610,7 +610,7 @@ def sanitize_array(
610610
# materialize e.g. generators, convert e.g. tuples, abc.ValueView
611611
if hasattr(data, "__array__"):
612612
# e.g. dask array GH#38645
613-
data = np.asarray(data)
613+
data = np.array(data, copy=copy)
614614
else:
615615
data = list(data)
616616

pandas/tests/copy_view/test_setitem.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,14 @@ def test_set_column_with_index(using_copy_on_write):
6060
idx.values[0] = 0
6161
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
6262

63-
# however, in case of a RangeIndex, we currently don't copy the cached
64-
# "materialized" values
6563
idx = RangeIndex(1, 4)
6664
arr = idx.values
6765

6866
df["d"] = idx
6967

70-
if using_copy_on_write:
71-
assert not np.shares_memory(df["d"].values, arr)
72-
arr[0] = 0
73-
tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
74-
else:
75-
assert np.shares_memory(df["d"].values, arr)
76-
arr[0] = 0
77-
tm.assert_series_equal(df["d"], Series([0, 2, 3], name="d"))
68+
assert not np.shares_memory(df["d"].values, arr)
69+
arr[0] = 0
70+
tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
7871

7972

8073
def test_set_columns_with_dataframe(using_copy_on_write):

pandas/tests/frame/indexing/test_setitem.py

+9
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,15 @@ def test_frame_setitem_newcol_timestamp(self):
857857
data[ts] = np.nan # works, mostly a smoke-test
858858
assert np.isnan(data[ts]).all()
859859

860+
def test_frame_setitem_rangeindex_into_new_col(self):
861+
# GH#47128
862+
df = DataFrame({"a": ["a", "b"]})
863+
df["b"] = df.index
864+
df.loc[[False, True], "b"] = 100
865+
result = df.loc[[1], :]
866+
expected = DataFrame({"a": ["b"], "b": [100]}, index=[1])
867+
tm.assert_frame_equal(result, expected)
868+
860869

861870
class TestDataFrameSetItemSlicing:
862871
def test_setitem_slice_position(self):

pandas/tests/test_downstream.py

+24
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,27 @@ def test_missing_required_dependency():
304304
output = exc.value.stdout.decode()
305305
for name in ["numpy", "pytz", "dateutil"]:
306306
assert name in output
307+
308+
309+
def test_frame_setitem_dask_array_into_new_col():
310+
# GH#47128
311+
312+
# dask sets "compute.use_numexpr" to False, so catch the current value
313+
# and ensure to reset it afterwards to avoid impacting other tests
314+
olduse = pd.get_option("compute.use_numexpr")
315+
316+
try:
317+
dask = import_module("dask") # noqa:F841
318+
319+
import dask.array as da
320+
321+
dda = da.array([1, 2])
322+
df = DataFrame({"a": ["a", "b"]})
323+
df["b"] = dda
324+
df["c"] = dda
325+
df.loc[[False, True], "b"] = 100
326+
result = df.loc[[1], :]
327+
expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
328+
tm.assert_frame_equal(result, expected)
329+
finally:
330+
pd.set_option("compute.use_numexpr", olduse)

0 commit comments

Comments
 (0)