From f8c22025d2273e35a0f8d5e3677968b4191eb97c Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Mon, 22 May 2017 15:59:48 -0700 Subject: [PATCH 01/13] BUG: .iloc[:] and .loc[:] return copy of original object (#13873) This is consistent with the behavior of [:] on a list https://docs.python.org/2/tutorial/introduction.html#lists https://docs.python.org/3/tutorial/introduction.html#lists --- pandas/core/indexing.py | 6 +++--- pandas/tests/indexing/test_iloc.py | 5 +++++ pandas/tests/indexing/test_loc.py | 5 +++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 50f2f9b52e111..0437d91a94bb6 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1250,7 +1250,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy() indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): @@ -1349,7 +1349,7 @@ def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy() labels = obj._get_axis(axis) indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, @@ -1690,7 +1690,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj + return obj.copy() slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index af4b9e1f0cc25..9f647819438fa 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -591,3 +591,8 @@ def test_iloc_empty_list_indexer_is_ok(self): tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True) + + def test_loc_identity_slice_returns_new_object(self): + # GH13873 + df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) + assert not df.iloc[:] is df diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fe2318be72eda..4c7250707da86 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -630,3 +630,8 @@ def test_loc_empty_list_indexer_is_ok(self): tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True) + + def test_loc_identity_slice_returns_new_object(self): + # GH13873 + df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) + assert not df.loc[:] is df From 84f2d5d1ef4aac28bf772619ec25b19f03f06780 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Mon, 22 May 2017 16:58:39 -0700 Subject: [PATCH 02/13] iloc and loc return new object when slice is a tuple --- pandas/core/indexing.py | 5 ++--- pandas/tests/indexing/test_iloc.py | 2 ++ pandas/tests/indexing/test_loc.py | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0437d91a94bb6..fd43b27e17610 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -845,7 +845,7 @@ def _getitem_tuple(self, tup): return self._multi_take(tup) # no shortcut needed - retval = self.obj + retval = self.obj.copy() for i, key in enumerate(tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') @@ -854,7 +854,6 @@ def _getitem_tuple(self, tup): continue retval = getattr(retval, self.name)._getitem_axis(key, axis=i) - return retval def _multi_take_opportunity(self, tup): @@ -1665,7 +1664,7 @@ def _getitem_tuple(self, tup): except: pass - retval = self.obj + retval = self.obj.copy() axis = 0 for i, key in enumerate(tup): if i >= self.obj.ndim: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 9f647819438fa..70c7f673adf84 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -596,3 +596,5 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) assert not df.iloc[:] is df + assert not df.iloc[:,:] is df + assert not df.iloc[pd.IndexSlice[:, :]] is df diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4c7250707da86..d86cf1fbc5520 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -635,3 +635,5 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) assert not df.loc[:] is df + assert not df.loc[:,:] is df + assert not df.loc[pd.IndexSlice[:, :]] is df From fd47ef0389f061ad4c167ec94e827c17061113c1 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Mon, 22 May 2017 17:11:59 -0700 Subject: [PATCH 03/13] Revert "iloc and loc return new object when slice is a tuple" This reverts commit 5daffa8a240f165cdaf74f70c5cb48fc4a0aa128. --- pandas/core/indexing.py | 5 +++-- pandas/tests/indexing/test_iloc.py | 2 -- pandas/tests/indexing/test_loc.py | 2 -- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fd43b27e17610..0437d91a94bb6 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -845,7 +845,7 @@ def _getitem_tuple(self, tup): return self._multi_take(tup) # no shortcut needed - retval = self.obj.copy() + retval = self.obj for i, key in enumerate(tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') @@ -854,6 +854,7 @@ def _getitem_tuple(self, tup): continue retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + return retval def _multi_take_opportunity(self, tup): @@ -1664,7 +1665,7 @@ def _getitem_tuple(self, tup): except: pass - retval = self.obj.copy() + retval = self.obj axis = 0 for i, key in enumerate(tup): if i >= self.obj.ndim: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 70c7f673adf84..9f647819438fa 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -596,5 +596,3 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) assert not df.iloc[:] is df - assert not df.iloc[:,:] is df - assert not df.iloc[pd.IndexSlice[:, :]] is df diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d86cf1fbc5520..4c7250707da86 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -635,5 +635,3 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) assert not df.loc[:] is df - assert not df.loc[:,:] is df - assert not df.loc[pd.IndexSlice[:, :]] is df From ef0af94bd366c5da16c3d3c1b66ae972d5ef3c2b Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Tue, 23 May 2017 10:20:04 -0700 Subject: [PATCH 04/13] return a shallow copy instead of deep copy --- pandas/core/indexing.py | 6 +++--- pandas/tests/indexing/test_iloc.py | 8 ++++++-- pandas/tests/indexing/test_loc.py | 8 ++++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0437d91a94bb6..eb24f32cfbe81 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1250,7 +1250,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj.copy() + return obj.copy(deep=False) indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): @@ -1349,7 +1349,7 @@ def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj if not need_slice(slice_obj): - return obj.copy() + return obj.copy(deep=False) labels = obj._get_axis(axis) indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, @@ -1690,7 +1690,7 @@ def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj if not need_slice(slice_obj): - return obj.copy() + return obj.copy(deep=False) slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 9f647819438fa..edd0678c2b59a 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -594,5 +594,9 @@ def test_iloc_empty_list_indexer_is_ok(self): def test_loc_identity_slice_returns_new_object(self): # GH13873 - df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) - assert not df.iloc[:] is df + df = DataFrame({'a': [1, 2, 3]}) + result = df.iloc[:] + assert not result is df + # should be a shallow copy + df['a'] = [4, 4, 4] + assert (result['a'] == 4).all() diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4c7250707da86..2ee32f69da394 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -633,5 +633,9 @@ def test_loc_empty_list_indexer_is_ok(self): def test_loc_identity_slice_returns_new_object(self): # GH13873 - df = DataFrame({'a': [1, 3, 5], 'b': [2, 4, 6]}) - assert not df.loc[:] is df + df = DataFrame({'a': [1, 2, 3]}) + result = df.loc[:] + assert not result is df + # should be a shallow copy + df['a'] = [4, 4, 4] + assert (result['a'] == 4).all() From cd4980ea1a525f3853f4a0d1967ab46680a1be2c Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Tue, 23 May 2017 14:17:56 -0700 Subject: [PATCH 05/13] _getitem_lowerdim won't return copy of object if called with [:] --- pandas/core/indexing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index eb24f32cfbe81..ae0aaf98fdf02 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -988,6 +988,10 @@ def _getitem_lowerdim(self, tup): if len(new_key) == 1: new_key, = new_key + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if is_null_slice(new_key): + return section # This is an elided recursive call to iloc/loc/etc' return getattr(section, self.name)[new_key] From 6771c595f10f27d1ecb5d107768680ff753d8a07 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Tue, 23 May 2017 14:43:59 -0700 Subject: [PATCH 06/13] Add item for indexers returning new object instead of original --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 79f2816f43a6f..0c6a332c8a1b4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -97,6 +97,7 @@ Conversion Indexing ^^^^^^^^ +- When called with a null slice (i.e. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). I/O From a405a108beaf7732df4cfe5990912c61152ff3f7 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Tue, 23 May 2017 14:48:59 -0700 Subject: [PATCH 07/13] grammar correction - i.e. to e.g. --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0c6a332c8a1b4..d6b699abdba2d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -97,7 +97,7 @@ Conversion Indexing ^^^^^^^^ -- When called with a null slice (i.e. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). I/O From ad1be5d77ac1cd3362b753a63a245a3f5afcbf29 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Thu, 25 May 2017 11:06:42 -0700 Subject: [PATCH 08/13] fix some spacing and linting errors --- pandas/tests/indexing/test_iloc.py | 3 ++- pandas/tests/indexing/test_loc.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index edd0678c2b59a..23f8223f60607 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -596,7 +596,8 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 2, 3]}) result = df.iloc[:] - assert not result is df + assert result is not df + # should be a shallow copy df['a'] = [4, 4, 4] assert (result['a'] == 4).all() diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2ee32f69da394..411f1dfe88d86 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -635,7 +635,8 @@ def test_loc_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 2, 3]}) result = df.loc[:] - assert not result is df + assert result is not df + # should be a shallow copy df['a'] = [4, 4, 4] assert (result['a'] == 4).all() From c139b479d83e3c48ed3b636b6a249e0e3540288e Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Thu, 25 May 2017 11:11:40 -0700 Subject: [PATCH 09/13] remove indexer type from test name --- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 23f8223f60607..b62c252e366de 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -592,7 +592,7 @@ def test_iloc_empty_list_indexer_is_ok(self): check_index_type=True, check_column_type=True) - def test_loc_identity_slice_returns_new_object(self): + def test_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 2, 3]}) result = df.iloc[:] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 411f1dfe88d86..d33eae59f45f5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -631,7 +631,7 @@ def test_loc_empty_list_indexer_is_ok(self): check_index_type=True, check_column_type=True) - def test_loc_identity_slice_returns_new_object(self): + def test_identity_slice_returns_new_object(self): # GH13873 df = DataFrame({'a': [1, 2, 3]}) result = df.loc[:] From 7634d3bd3e2a9db1a288e90d26995a9fe7d257d9 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Thu, 25 May 2017 11:48:36 -0700 Subject: [PATCH 10/13] add tests for Series --- pandas/tests/indexing/test_iloc.py | 18 +++++++++++++----- pandas/tests/indexing/test_loc.py | 18 +++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index b62c252e366de..769cf8ec395dd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -594,10 +594,18 @@ def test_iloc_empty_list_indexer_is_ok(self): def test_identity_slice_returns_new_object(self): # GH13873 - df = DataFrame({'a': [1, 2, 3]}) - result = df.iloc[:] - assert result is not df + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df # should be a shallow copy - df['a'] = [4, 4, 4] - assert (result['a'] == 4).all() + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d33eae59f45f5..44dcbd7affaff 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -633,10 +633,18 @@ def test_loc_empty_list_indexer_is_ok(self): def test_identity_slice_returns_new_object(self): # GH13873 - df = DataFrame({'a': [1, 2, 3]}) - result = df.loc[:] - assert result is not df + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df # should be a shallow copy - df['a'] = [4, 4, 4] - assert (result['a'] == 4).all() + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) From c78c43c9672a558f3ec0f4c8bc1fb6d330e81f34 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Thu, 25 May 2017 14:10:14 -0700 Subject: [PATCH 11/13] test that df.loc[:,0] returns a view --- pandas/tests/indexing/test_loc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 44dcbd7affaff..272e3bd1172e7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -641,10 +641,15 @@ def test_identity_slice_returns_new_object(self): original_df['a'] = [4, 4, 4] assert (sliced_df['a'] == 4).all() + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:,0] + + # Same tests for Series original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.loc[:] assert sliced_series is not original_series - # should also be a shallow copy original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9]) From 3f33c61d3b59519ec8da56b0230e88ae3c1fc880 Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Fri, 26 May 2017 20:12:23 -0700 Subject: [PATCH 12/13] fix linting error --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 272e3bd1172e7..571268fba86c8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -644,7 +644,7 @@ def test_identity_slice_returns_new_object(self): # These should not return copies assert original_df is original_df.loc[:, :] df = DataFrame(np.random.randn(10, 4)) - assert df[0] is df.loc[:,0] + assert df[0] is df.loc[:, 0] # Same tests for Series original_series = Series([1, 2, 3, 4, 5, 6]) From d13082430513a0ebcb61f0c4ad79eebeab27194f Mon Sep 17 00:00:00 2001 From: Margaret Sy Date: Sat, 3 Jun 2017 10:39:10 -0700 Subject: [PATCH 13/13] add tests for direct slicing --- pandas/tests/indexing/test_loc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 571268fba86c8..3e863a59df67e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -636,6 +636,7 @@ def test_identity_slice_returns_new_object(self): original_df = DataFrame({'a': [1, 2, 3]}) sliced_df = original_df.loc[:] assert sliced_df is not original_df + assert original_df[:] is not original_df # should be a shallow copy original_df['a'] = [4, 4, 4] @@ -650,6 +651,7 @@ def test_identity_slice_returns_new_object(self): original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.loc[:] assert sliced_series is not original_series + assert original_series[:] is not original_series original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9])