Skip to content

Commit 0085460

Browse files
committed
ENH: add get_value/set_value support to sparse data structures, #438
1 parent 231a6da commit 0085460

File tree

8 files changed

+142
-27
lines changed

8 files changed

+142
-27
lines changed

RELEASE.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ pandas 0.6.1
3535
DataFrame.corr (GH #428)
3636
- Add new `get_value` and `set_value` methods to Series, DataFrame, and Panel
3737
to very low-overhead access to scalar elements. df.get_value(row, column)
38-
is about 3x faster than df[column][row] by handling fewer cases (GH #437)
38+
is about 3x faster than df[column][row] by handling fewer cases (GH #437,
39+
#438). Add similar methods to sparse data structures for compatibility
3940
- Add Qt table widget to sandbox (PR #435)
4041

4142
**Improvements to existing features**
@@ -49,6 +50,7 @@ pandas 0.6.1
4950
- Override Index.astype to enable dtype casting (GH #412)
5051
- Use same float formatting function for Series.__repr__ (PR #420)
5152
- Use available console width to output DataFrame columns (PR #453)
53+
- Accept ndarrays when setting items in Panel (GH #452)
5254

5355
**Bug fixes**
5456

pandas/core/frame.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -956,17 +956,9 @@ def set_value(self, index, col, value):
956956
engine.set_value(series, index, value)
957957
return self
958958
except KeyError:
959-
if index not in self.index:
960-
new_index = np.concatenate([self.index, [index]])
961-
else:
962-
new_index = self.index
963-
if col not in self.columns:
964-
new_columns = np.concatenate([self.columns, [col]])
965-
else:
966-
new_columns = self.columns
967-
result = DataFrame(self._data, index=new_index,
968-
columns=new_columns)
969-
959+
new_index, new_columns = self._expand_axes((index, col))
960+
result = self.reindex(index=new_index, columns=new_columns,
961+
copy=False)
970962
likely_dtype = com._infer_dtype(value)
971963
if result[col].dtype != likely_dtype:
972964
result[col] = result[col].astype(likely_dtype)

pandas/core/generic.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,16 @@ def pop(self, item):
318318
del self[item]
319319
return result
320320

321+
def _expand_axes(self, key):
322+
new_axes = []
323+
for k, ax in zip(key, self.axes):
324+
if k not in ax:
325+
new_axes.append(np.concatenate([ax, [k]]))
326+
else:
327+
new_axes.append(ax)
328+
329+
return new_axes
330+
321331
#----------------------------------------------------------------------
322332
# Consolidation of internals
323333

pandas/core/panel.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,15 @@ def set_value(self, item, major, minor, value):
489489
If label combo is contained, will be reference to calling Panel,
490490
otherwise a new object
491491
"""
492-
frame = self._get_item_cache(item)
493-
return frame.set_value(major, minor, value)
492+
try:
493+
frame = self._get_item_cache(item)
494+
frame.set_value(major, minor, value)
495+
return self
496+
except KeyError:
497+
ax1, ax2, ax3 = self._expand_axes((item, major, minor))
498+
result = self.reindex(items=ax1, major=ax2, minor=ax3, copy=False)
499+
result = result.set_value(item, major, minor, value)
500+
return result
494501

495502
def _box_item_values(self, key, values):
496503
return DataFrame(values, index=self.major_axis, columns=self.minor_axis)

pandas/core/series.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,15 @@ def get_value(self, label):
327327
def set_value(self, label, value):
328328
"""
329329
Quickly set single value at passed label. If label is not contained, a
330+
new object is created with the label placed at the end of the result
331+
index
330332
331333
Parameters
332334
----------
333335
label : object
334336
Partial indexing with MultiIndex not allowed
337+
value : object
338+
Scalar value
335339
336340
Returns
337341
-------

pandas/core/sparse.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -400,11 +400,44 @@ def _get_val_at(self, loc):
400400
return ndarray.__getitem__(self, sp_loc)
401401

402402
def get_value(self, label):
403+
"""
404+
Retrieve single value at passed index label
405+
406+
Parameters
407+
----------
408+
index : label
409+
410+
Returns
411+
-------
412+
value : scalar value
413+
"""
403414
loc = self.index.get_loc(label)
404415
return self._get_val_at(loc)
405416

406417
def set_value(self, label, value):
407-
raise Exception('SparseSeries is immutable')
418+
"""
419+
Quickly set single value at passed label. If label is not contained, a
420+
new object is created with the label placed at the end of the result
421+
index
422+
423+
Parameters
424+
----------
425+
label : object
426+
Partial indexing with MultiIndex not allowed
427+
value : object
428+
Scalar value
429+
430+
Notes
431+
-----
432+
This method *always* returns a new object. It is not particularly
433+
efficient but is provided for API compatibility with Series
434+
435+
Returns
436+
-------
437+
series : SparseSeries
438+
"""
439+
dense = self.to_dense().set_value(label, value)
440+
return dense.to_sparse(kind=self.kind, fill_value=self.fill_value)
408441

409442
def take(self, indices):
410443
"""
@@ -946,13 +979,32 @@ def _get_item_cache(self, key):
946979
def get_value(self, index, col):
947980
s = self._series[col]
948981
return s.get_value(index)
949-
if __debug__: get_value.__doc__ = DataFrame.get_value.__doc__
982+
if __debug__:
983+
get_value.__doc__ = DataFrame.get_value.__doc__
950984

951985
def set_value(self, index, col, value):
952986
"""
953-
Not implemented for SparseDataFrame
987+
Put single value at passed column and index
988+
989+
Parameters
990+
----------
991+
index : row label
992+
col : column label
993+
value : scalar value
994+
995+
Notes
996+
-----
997+
This method *always* returns a new object. It is currently not
998+
particularly efficient (and potentially very expensive) but is provided
999+
for API compatibility with DataFrame
1000+
1001+
Returns
1002+
-------
1003+
frame : DataFrame
9541004
"""
955-
raise Exception('Values in SparseDataFrame are immutable')
1005+
dense = self.to_dense().set_value(index, col, value)
1006+
return dense.to_sparse(kind=self.default_kind,
1007+
fill_value=self.default_fill_value)
9561008

9571009
def _slice(self, slobj, axis=0):
9581010
if axis == 0:
@@ -1527,8 +1579,29 @@ def __setitem__(self, key, value):
15271579
if key not in self.items:
15281580
self._items = Index(list(self.items) + [key])
15291581

1530-
def set_value(self, item, row, column, value):
1531-
raise Exception('Sparse object scalar values are immutable')
1582+
def set_value(self, item, major, minor, value):
1583+
"""
1584+
Quickly set single value at (item, major, minor) location
1585+
1586+
Parameters
1587+
----------
1588+
item : item label (panel item)
1589+
major : major axis label (panel item row)
1590+
minor : minor axis label (panel item column)
1591+
value : scalar
1592+
1593+
Notes
1594+
-----
1595+
This method *always* returns a new object. It is not particularly
1596+
efficient but is provided for API compatibility with Panel
1597+
1598+
Returns
1599+
-------
1600+
panel : SparsePanel
1601+
"""
1602+
dense = self.to_dense().set_value(item, major, minor, value)
1603+
return dense.to_sparse(kind=self.default_kind,
1604+
fill_value=self.default_fill_value)
15321605

15331606
def __delitem__(self, key):
15341607
loc = self.items.get_loc(key)

pandas/tests/test_panel.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,12 @@ def test_set_value(self):
565565
self.panel.set_value(item, mjr, mnr, 1.)
566566
assert_almost_equal(self.panel[item][mnr][mjr], 1.)
567567

568+
# resize
569+
res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5)
570+
self.assert_(isinstance(res, Panel))
571+
self.assert_(res is not self.panel)
572+
self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5)
573+
568574
class TestPanel(unittest.TestCase, PanelTests, CheckIndexing,
569575
SafeForLongAndSparse,
570576
SafeForSparse):

pandas/tests/test_sparse.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,15 @@ def test_get_get_value(self):
316316
assert_almost_equal(self.bseries.get_value(10), self.bseries[10])
317317

318318
def test_set_value(self):
319-
self.assertRaises(Exception, self.bseries.set_value, 10, 0)
319+
idx = self.btseries.index[7]
320+
res = self.btseries.set_value(idx, 0)
321+
self.assert_(res is not self.btseries)
322+
self.assertEqual(res[idx], 0)
323+
324+
res = self.iseries.set_value('foobar', 0)
325+
self.assert_(res is not self.iseries)
326+
self.assert_(res.index[-1] == 'foobar')
327+
self.assertEqual(res['foobar'], 0)
320328

321329
def test_getitem_fancy_index(self):
322330
idx = self.bseries.index
@@ -830,7 +838,16 @@ def test_getitem(self):
830838
pass
831839

832840
def test_set_value(self):
833-
self.assertRaises(Exception, self.frame.set_value, 10, 0)
841+
res = self.frame.set_value('foobar', 'B', 1.5)
842+
self.assert_(res is not self.frame)
843+
self.assert_(res.index[-1] == 'foobar')
844+
self.assertEqual(res.get_value('foobar', 'B'), 1.5)
845+
846+
res2 = res.set_value('foobar', 'qux', 1.5)
847+
self.assert_(res2 is not res)
848+
self.assert_(np.array_equal(res2.columns,
849+
list(self.frame.columns) + ['qux']))
850+
self.assertEqual(res2.get_value('foobar', 'qux'), 1.5)
834851

835852
def test_fancy_index_misc(self):
836853
# axis = 0
@@ -1273,11 +1290,15 @@ def test_setitem(self):
12731290
self.assertRaises(Exception, self.panel.__setitem__, 'item6', 1)
12741291

12751292
def test_set_value(self):
1276-
mjr = self.panel.major_axis[4]
1277-
mnr = self.panel.minor_axis[3]
1278-
1279-
self.assertRaises(Exception, self.panel.set_value, 'ItemA',
1280-
)
1293+
def _check_loc(item, major, minor, val=1.5):
1294+
res = self.panel.set_value(item, major, minor, val)
1295+
self.assert_(res is not self.panel)
1296+
self.assertEquals(res.get_value(item, major, minor), val)
1297+
1298+
_check_loc('ItemA', self.panel.major_axis[4], self.panel.minor_axis[3])
1299+
_check_loc('ItemF', self.panel.major_axis[4], self.panel.minor_axis[3])
1300+
_check_loc('ItemF', 'foo', self.panel.minor_axis[3])
1301+
_check_loc('ItemE', 'foo', 'bar')
12811302

12821303
def test_delitem_pop(self):
12831304
del self.panel['ItemB']

0 commit comments

Comments
 (0)