From 7bbeb79297532d209d67f7bdc61394ddee3acbb1 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 14 Oct 2013 22:18:14 -0400 Subject: [PATCH] BUG: allow enlargement to work with empty objects (GH5226) --- doc/source/release.rst | 2 +- pandas/core/frame.py | 13 ++++++++- pandas/core/indexing.py | 44 +++++++++++++++++++++++------- pandas/tests/test_indexing.py | 50 +++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 11 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 36afea7648ab2..ab4bc1a1f0bf9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -206,7 +206,7 @@ API Changes (:issue:`4384`, :issue:`4375`, :issue:`4372`) - ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`) - allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in - the index for that axis (:issue:`2578`) + the index for that axis (:issue:`2578`, :issue:`5226`) - ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`) - ``HDFStore`` diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7013ad4f9b02b..504d49ddca13a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1865,6 +1865,15 @@ def _setitem_frame(self, key, value): self.where(-key, value, inplace=True) + def _ensure_valid_index(self, value): + """ + ensure that if we don't have an index, that we can create one from the passed value + """ + if not len(self.index): + if not isinstance(value, Series): + raise ValueError("cannot set a frame with no defined index and a non-series") + self._data.set_axis(1, value.index.copy(), check_axis=False) + def _set_item(self, key, value): """ Add series to DataFrame in specified column. @@ -1875,6 +1884,7 @@ def _set_item(self, key, value): Series/TimeSeries will be conformed to the DataFrame's index to ensure homogeneity. """ + self._ensure_valid_index(value) value = self._sanitize_column(key, value) NDFrame._set_item(self, key, value) @@ -1890,6 +1900,7 @@ def insert(self, loc, column, value, allow_duplicates=False): column : object value : int, Series, or array-like """ + self._ensure_valid_index(value) value = self._sanitize_column(column, value) self._data.insert( loc, column, value, allow_duplicates=allow_duplicates) @@ -1900,7 +1911,7 @@ def _sanitize_column(self, key, value): if _is_sequence(value): is_frame = isinstance(value, DataFrame) if isinstance(value, Series) or is_frame: - if value.index.equals(self.index): + if value.index.equals(self.index) or not len(self.index): # copy the values value = value.values.copy() else: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fa58d82a3b580..d32bf166ddea1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -173,9 +173,19 @@ def _setitem_with_indexer(self, indexer, value): if self.ndim > 1 and i == self.obj._info_axis_number: # add the new item, and set the value - new_indexer = _convert_from_missing_indexer_tuple(indexer) + # must have all defined axes if we have a scalar + # or a list-like on the non-info axes if we have a list-like + len_non_info_axes = [ len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i ] + if any([ not l for l in len_non_info_axes ]): + if not is_list_like(value): + raise ValueError("cannot set a frame with no defined index and a scalar") + self.obj[key] = value + return self.obj + self.obj[key] = np.nan - self.obj.loc[new_indexer] = value + + new_indexer = _convert_from_missing_indexer_tuple(indexer, self.obj.axes) + self._setitem_with_indexer(new_indexer, value) return self.obj # reindex the axis @@ -208,12 +218,21 @@ def _setitem_with_indexer(self, indexer, value): else: new_index = _safe_append_to_index(index, indexer) - new_values = np.concatenate([self.obj.values, [value]]) + # this preserves dtype of the value + new_values = Series([value]).values + if len(self.obj.values): + new_values = np.concatenate([self.obj.values, new_values]) + self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name) self.obj._maybe_update_cacher(clear=True) return self.obj elif self.ndim == 2: + + # no columns and scalar + if not len(self.obj.columns): + raise ValueError("cannot set a frame with no defined columns") + index = self.obj._get_axis(0) labels = _safe_append_to_index(index, indexer) self.obj._data = self.obj.reindex_axis(labels,0)._data @@ -410,8 +429,9 @@ def _align_series(self, indexer, ser): new_ix = Index([new_ix]) else: new_ix = Index(new_ix.ravel()) - if ser.index.equals(new_ix): + if ser.index.equals(new_ix) or not len(new_ix): return ser.values.copy() + return ser.reindex(new_ix).values # 2 dims @@ -419,7 +439,7 @@ def _align_series(self, indexer, ser): # reindex along index ax = self.obj.axes[1] - if ser.index.equals(ax): + if ser.index.equals(ax) or not len(ax): return ser.values.copy() return ser.reindex(ax).values @@ -819,6 +839,12 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # if we are setting and its not a valid location # its an insert which fails by definition if is_setter: + + # always valid + if self.name == 'loc': + return { 'key' : obj } + + # a positional if obj >= len(self.obj) and not isinstance(labels, MultiIndex): raise ValueError("cannot set by positional indexing with enlargement") @@ -1307,11 +1333,11 @@ def _convert_missing_indexer(indexer): return indexer, False -def _convert_from_missing_indexer_tuple(indexer): +def _convert_from_missing_indexer_tuple(indexer, axes): """ create a filtered indexer that doesn't have any missing indexers """ - def get_indexer(_idx): - return _idx['key'] if isinstance(_idx,dict) else _idx - return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ]) + def get_indexer(_i, _idx): + return axes[_i].get_loc(_idx['key']) if isinstance(_idx,dict) else _idx + return tuple([ get_indexer(_i, _idx) for _i, _idx in enumerate(indexer) ]) def _safe_append_to_index(index, key): """ a safe append to an index, if incorrect type, then catch and recreate """ diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b69496b042274..7745c2f2a083b 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1542,6 +1542,56 @@ def f(): df.ix[100,:] = df.ix[0] self.assertRaises(ValueError, f) + def test_partial_set_empty(self): + + # GH5226 + + # partially set with an empty object + # series + s = Series() + s.loc[1] = 1 + assert_series_equal(s,Series([1],index=[1])) + s.loc[3] = 3 + assert_series_equal(s,Series([1,3],index=[1,3])) + + s = Series() + s.loc[1] = 1. + assert_series_equal(s,Series([1.],index=[1])) + s.loc[3] = 3. + assert_series_equal(s,Series([1.,3.],index=[1,3])) + + s = Series() + s.loc['foo'] = 1 + assert_series_equal(s,Series([1],index=['foo'])) + s.loc['bar'] = 3 + assert_series_equal(s,Series([1,3],index=['foo','bar'])) + s.loc[3] = 4 + assert_series_equal(s,Series([1,3,4],index=['foo','bar',3])) + + # partially set with an empty object + # frame + df = DataFrame() + + def f(): + df.loc[1] = 1 + self.assertRaises(ValueError, f) + def f(): + df.loc[1] = Series([1],index=['foo']) + self.assertRaises(ValueError, f) + def f(): + df.loc[:,1] = 1 + self.assertRaises(ValueError, f) + + df2 = DataFrame() + df2[1] = Series([1],index=['foo']) + df.loc[:,1] = Series([1],index=['foo']) + assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1])) + assert_frame_equal(df,df2) + + df = DataFrame(columns=['A','B']) + df.loc[3] = [6,7] + assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B'])) + def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem