Skip to content

BUG: allow enlargement to work with empty objects (GH5226) #5227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 15, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ API Changes
(:issue:`4384`, :issue:`4375`, :issue:`4372`)
- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
- allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
the index for that axis (:issue:`2578`)
the index for that axis (:issue:`2578`, :issue:`5226`)
- ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`)

- ``HDFStore``
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1865,6 +1865,15 @@ def _setitem_frame(self, key, value):

self.where(-key, value, inplace=True)

def _ensure_valid_index(self, value):
"""
ensure that if we don't have an index, that we can create one from the passed value
"""
if not len(self.index):
if not isinstance(value, Series):
raise ValueError("cannot set a frame with no defined index and a non-series")
self._data.set_axis(1, value.index.copy(), check_axis=False)

def _set_item(self, key, value):
"""
Add series to DataFrame in specified column.
Expand All @@ -1875,6 +1884,7 @@ def _set_item(self, key, value):
Series/TimeSeries will be conformed to the DataFrame's index to
ensure homogeneity.
"""
self._ensure_valid_index(value)
value = self._sanitize_column(key, value)
NDFrame._set_item(self, key, value)

Expand All @@ -1890,6 +1900,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
column : object
value : int, Series, or array-like
"""
self._ensure_valid_index(value)
value = self._sanitize_column(column, value)
self._data.insert(
loc, column, value, allow_duplicates=allow_duplicates)
Expand All @@ -1900,7 +1911,7 @@ def _sanitize_column(self, key, value):
if _is_sequence(value):
is_frame = isinstance(value, DataFrame)
if isinstance(value, Series) or is_frame:
if value.index.equals(self.index):
if value.index.equals(self.index) or not len(self.index):
# copy the values
value = value.values.copy()
else:
Expand Down
44 changes: 35 additions & 9 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,19 @@ def _setitem_with_indexer(self, indexer, value):
if self.ndim > 1 and i == self.obj._info_axis_number:

# add the new item, and set the value
new_indexer = _convert_from_missing_indexer_tuple(indexer)
# must have all defined axes if we have a scalar
# or a list-like on the non-info axes if we have a list-like
len_non_info_axes = [ len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i ]
if any([ not l for l in len_non_info_axes ]):
if not is_list_like(value):
raise ValueError("cannot set a frame with no defined index and a scalar")
self.obj[key] = value
return self.obj

self.obj[key] = np.nan
self.obj.loc[new_indexer] = value

new_indexer = _convert_from_missing_indexer_tuple(indexer, self.obj.axes)
self._setitem_with_indexer(new_indexer, value)
return self.obj

# reindex the axis
Expand Down Expand Up @@ -208,12 +218,21 @@ def _setitem_with_indexer(self, indexer, value):
else:
new_index = _safe_append_to_index(index, indexer)

new_values = np.concatenate([self.obj.values, [value]])
# this preserves dtype of the value
new_values = Series([value]).values
if len(self.obj.values):
new_values = np.concatenate([self.obj.values, new_values])

self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name)
self.obj._maybe_update_cacher(clear=True)
return self.obj

elif self.ndim == 2:

# no columns and scalar
if not len(self.obj.columns):
raise ValueError("cannot set a frame with no defined columns")

index = self.obj._get_axis(0)
labels = _safe_append_to_index(index, indexer)
self.obj._data = self.obj.reindex_axis(labels,0)._data
Expand Down Expand Up @@ -410,16 +429,17 @@ def _align_series(self, indexer, ser):
new_ix = Index([new_ix])
else:
new_ix = Index(new_ix.ravel())
if ser.index.equals(new_ix):
if ser.index.equals(new_ix) or not len(new_ix):
return ser.values.copy()

return ser.reindex(new_ix).values

# 2 dims
elif single_aligner and is_frame:

# reindex along index
ax = self.obj.axes[1]
if ser.index.equals(ax):
if ser.index.equals(ax) or not len(ax):
return ser.values.copy()
return ser.reindex(ax).values

Expand Down Expand Up @@ -819,6 +839,12 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
# if we are setting and its not a valid location
# its an insert which fails by definition
if is_setter:

# always valid
if self.name == 'loc':
return { 'key' : obj }

# a positional
if obj >= len(self.obj) and not isinstance(labels, MultiIndex):
raise ValueError("cannot set by positional indexing with enlargement")

Expand Down Expand Up @@ -1307,11 +1333,11 @@ def _convert_missing_indexer(indexer):

return indexer, False

def _convert_from_missing_indexer_tuple(indexer):
def _convert_from_missing_indexer_tuple(indexer, axes):
""" create a filtered indexer that doesn't have any missing indexers """
def get_indexer(_idx):
return _idx['key'] if isinstance(_idx,dict) else _idx
return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ])
def get_indexer(_i, _idx):
return axes[_i].get_loc(_idx['key']) if isinstance(_idx,dict) else _idx
return tuple([ get_indexer(_i, _idx) for _i, _idx in enumerate(indexer) ])

def _safe_append_to_index(index, key):
""" a safe append to an index, if incorrect type, then catch and recreate """
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,56 @@ def f():
df.ix[100,:] = df.ix[0]
self.assertRaises(ValueError, f)

def test_partial_set_empty(self):

# GH5226

# partially set with an empty object
# series
s = Series()
s.loc[1] = 1
assert_series_equal(s,Series([1],index=[1]))
s.loc[3] = 3
assert_series_equal(s,Series([1,3],index=[1,3]))

s = Series()
s.loc[1] = 1.
assert_series_equal(s,Series([1.],index=[1]))
s.loc[3] = 3.
assert_series_equal(s,Series([1.,3.],index=[1,3]))

s = Series()
s.loc['foo'] = 1
assert_series_equal(s,Series([1],index=['foo']))
s.loc['bar'] = 3
assert_series_equal(s,Series([1,3],index=['foo','bar']))
s.loc[3] = 4
assert_series_equal(s,Series([1,3,4],index=['foo','bar',3]))

# partially set with an empty object
# frame
df = DataFrame()

def f():
df.loc[1] = 1
self.assertRaises(ValueError, f)
def f():
df.loc[1] = Series([1],index=['foo'])
self.assertRaises(ValueError, f)
def f():
df.loc[:,1] = 1
self.assertRaises(ValueError, f)

df2 = DataFrame()
df2[1] = Series([1],index=['foo'])
df.loc[:,1] = Series([1],index=['foo'])
assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1]))
assert_frame_equal(df,df2)

df = DataFrame(columns=['A','B'])
df.loc[3] = [6,7]
assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B']))

def test_cache_updating(self):
# GH 4939, make sure to update the cache on setitem

Expand Down