Skip to content

Change pandas to never let objects share _data, create views instead #11855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
dtype = self._validate_dtype(dtype)

if isinstance(data, DataFrame):
data = data.iloc[:,:]
data = data._data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need to define BlockManager.view() this will do all of the heavy lifting, everything else will flow from there


if isinstance(data, BlockManager):
Expand Down
24 changes: 4 additions & 20 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,9 +752,6 @@ def _getitem_tuple(self, tup):
if i >= self.obj.ndim:
raise IndexingError('Too many indexers')

if is_null_slice(key):
continue

retval = getattr(retval, self.name)._getitem_axis(key, axis=i)

return retval
Expand Down Expand Up @@ -1171,8 +1168,6 @@ def _tuplify(self, loc):
def _get_slice_axis(self, slice_obj, axis=0):
obj = self.obj

if not need_slice(slice_obj):
return obj
indexer = self._convert_slice_indexer(slice_obj, axis)

if isinstance(indexer, slice):
Expand Down Expand Up @@ -1244,8 +1239,7 @@ def _getbool_axis(self, key, axis=0):
def _get_slice_axis(self, slice_obj, axis=0):
""" this is pretty simple as we just have to deal with labels """
obj = self.obj
if not need_slice(slice_obj):
return obj


labels = obj._get_axis(axis)
indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
Expand Down Expand Up @@ -1461,9 +1455,9 @@ def _getitem_tuple(self, tup):
if i >= self.obj.ndim:
raise IndexingError('Too many indexers')

if is_null_slice(key):
axis += 1
continue
#if is_null_slice(key):
# axis += 1
# continue

retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)

Expand All @@ -1477,10 +1471,6 @@ def _getitem_tuple(self, tup):
return retval

def _get_slice_axis(self, slice_obj, axis=0):
obj = self.obj

if not need_slice(slice_obj):
return obj

slice_obj = self._convert_slice_indexer(slice_obj, axis)
if isinstance(slice_obj, slice):
Expand Down Expand Up @@ -1792,12 +1782,6 @@ def is_label_like(key):
return not isinstance(key, slice) and not is_list_like_indexer(key)


def need_slice(obj):
return (obj.start is not None or
obj.stop is not None or
(obj.step is not None and obj.step != 1))


def maybe_droplevels(index, key):
# drop levels
original_index = index
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2637,6 +2637,7 @@ def test_constructor_dtype_copy(self):
def test_constructor_dtype_nocast_view(self):
df = DataFrame([[1, 2]])
should_be_view = DataFrame(df, dtype=df[0].dtype)
self.assertTrue(should_be_view._is_view)
should_be_view[0][0] = 99
self.assertEqual(df.values[0, 0], 99)

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4912,6 +4912,14 @@ def test_maybe_numeric_slice(self):
expected = [1]
self.assertEqual(result, expected)

def test_empty_indexers_return_view(self):
# Closes Issue 11814
df = pd.DataFrame({'col1':range(10,20),
'col2':range(20,30)})
self.assertTrue(df.loc[:,:]._is_view)
self.assertTrue(df.iloc[:,:]._is_view)
self.assertTrue(df.ix[:,:]._is_view)


class TestCategoricalIndex(tm.TestCase):

Expand Down Expand Up @@ -5196,6 +5204,7 @@ def test_boolean_selection(self):
self.assertRaises(TypeError, lambda : df4[df4.index < 2])
self.assertRaises(TypeError, lambda : df4[df4.index > 1])


class TestSeriesNoneCoercion(tm.TestCase):
EXPECTED_RESULTS = [
# For numeric series, we should coerce to NaN.
Expand Down