Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5adf4ea

Browse files
committedFeb 9, 2014
Merge pull request #6177 from wabu/selection-start-stop-fixes
ENH: select column/coordinates/multiple with start/stop/selection
2 parents f4bcfd4 + 6cb1bba commit 5adf4ea

File tree

3 files changed

+155
-38
lines changed

3 files changed

+155
-38
lines changed
 

‎doc/source/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ API Changes
6060
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
6161
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
6262
``IndexError`` (:issue:`6296`)
63+
- ``select_as_multiple`` will always raise a ``KeyError``, when a key or the selector is not found (:issue:`6177`)
6364

6465
Experimental Features
6566
~~~~~~~~~~~~~~~~~~~~~
@@ -86,6 +87,9 @@ Bug Fixes
8687
- Bug in conversion of a string types to a DatetimeIndex with a specified frequency (:issue:`6273`, :issue:`6274`)
8788
- Bug in ``eval`` where type-promotion failed for large expressions (:issue:`6205`)
8889
- Bug in interpolate with inplace=True (:issue:`6281`)
90+
- ``HDFStore.remove`` now handles start and stop (:issue:`6177`)
91+
- ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`)
92+
- ``HDFStore.select_as_coordinates`` and ``select_column`` works where clauses that result in filters (:issue:`6177`)
8993

9094
pandas 0.13.1
9195
-------------

‎pandas/io/pytables.py

Lines changed: 56 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -724,8 +724,9 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
724724
725725
Exceptions
726726
----------
727-
raise if any of the keys don't refer to tables or if they are not ALL
728-
THE SAME DIMENSIONS
727+
raises KeyError if keys or selector is not found or keys is empty
728+
raises TypeError if keys is not a list or tuple
729+
raises ValueError if the tables are not ALL THE SAME DIMENSIONS
729730
"""
730731

731732
# default to single select
@@ -748,12 +749,13 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
748749

749750
# collect the tables
750751
tbls = [self.get_storer(k) for k in keys]
752+
s = self.get_storer(selector)
751753

752754
# validate rows
753755
nrows = None
754-
for t, k in zip(tbls, keys):
756+
for t, k in itertools.chain([(s,selector)], zip(tbls, keys)):
755757
if t is None:
756-
raise TypeError("Invalid table [%s]" % k)
758+
raise KeyError("Invalid table [%s]" % k)
757759
if not t.is_table:
758760
raise TypeError(
759761
"object [%s] is not a table, and cannot be used in all "
@@ -766,22 +768,17 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
766768
raise ValueError(
767769
"all tables must have exactly the same nrows!")
768770

769-
# select coordinates from the selector table
770-
try:
771-
c = self.select_as_coordinates(
772-
selector, where, start=start, stop=stop)
773-
nrows = len(c)
774-
except Exception:
775-
raise ValueError("invalid selector [%s]" % selector)
771+
# axis is the concentation axes
772+
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
776773

777774
def func(_start, _stop):
778-
779-
# collect the returns objs
780-
objs = [t.read(where=c[_start:_stop], columns=columns)
781-
for t in tbls]
782-
783-
# axis is the concentation axes
784-
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
775+
if where is not None:
776+
c = s.read_coordinates(where=where, start=_start, stop=_stop, **kwargs)
777+
else:
778+
c = None
779+
780+
objs = [t.read(where=c, start=_start, stop=_stop,
781+
columns=columns, **kwargs) for t in tbls]
785782

786783
# concat and return
787784
return concat(objs, axis=axis,
@@ -860,7 +857,7 @@ def remove(self, key, where=None, start=None, stop=None):
860857
raise KeyError('No object named %s in the file' % key)
861858

862859
# remove the node
863-
if where is None:
860+
if where is None and start is None and stop is None:
864861
s.group._f_remove(recursive=True)
865862

866863
# delete from the table
@@ -2139,11 +2136,9 @@ def write(self, **kwargs):
21392136
raise NotImplementedError(
21402137
"cannot write on an abstract storer: sublcasses should implement")
21412138

2142-
def delete(self, where=None, **kwargs):
2143-
"""support fully deleting the node in its entirety (only) - where
2144-
specification must be None
2145-
"""
2146-
if where is None:
2139+
def delete(self, where=None, start=None, stop=None, **kwargs):
2140+
""" support fully deleting the node in its entirety (only) - where specification must be None """
2141+
if where is None and start is None and stop is None:
21472142
self._handle.removeNode(self.group, recursive=True)
21482143
return None
21492144

@@ -3381,9 +3376,15 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
33813376
# create the selection
33823377
self.selection = Selection(
33833378
self, where=where, start=start, stop=stop, **kwargs)
3384-
return Index(self.selection.select_coords())
3379+
coords = self.selection.select_coords()
3380+
if self.selection.filter is not None:
3381+
for field, op, filt in self.selection.filter.format():
3382+
data = self.read_column(field, start=coords.min(), stop=coords.max()+1)
3383+
coords = coords[op(data.iloc[coords-coords.min()], filt).values]
33853384

3386-
def read_column(self, column, where=None, **kwargs):
3385+
return Index(coords)
3386+
3387+
def read_column(self, column, where=None, start=None, stop=None, **kwargs):
33873388
"""return a single column from the table, generally only indexables
33883389
are interesting
33893390
"""
@@ -3411,7 +3412,7 @@ def read_column(self, column, where=None, **kwargs):
34113412
# column must be an indexable or a data column
34123413
c = getattr(self.table.cols, column)
34133414
a.set_info(self.info)
3414-
return Series(a.convert(c[:], nan_rep=self.nan_rep,
3415+
return Series(a.convert(c[start:stop], nan_rep=self.nan_rep,
34153416
encoding=self.encoding).take_data())
34163417

34173418
raise KeyError("column [%s] not found in the table" % column)
@@ -3712,12 +3713,19 @@ def write_data_chunk(self, indexes, mask, values):
37123713
except Exception as detail:
37133714
raise TypeError("tables cannot write this data -> %s" % detail)
37143715

3715-
def delete(self, where=None, **kwargs):
3716+
def delete(self, where=None, start=None, stop=None, **kwargs):
37163717

37173718
# delete all rows (and return the nrows)
37183719
if where is None or not len(where):
3719-
nrows = self.nrows
3720-
self._handle.removeNode(self.group, recursive=True)
3720+
if start is None and stop is None:
3721+
nrows = self.nrows
3722+
self._handle.removeNode(self.group, recursive=True)
3723+
else:
3724+
# pytables<3.0 would remove a single row with stop=None
3725+
if stop is None:
3726+
stop = self.nrows
3727+
nrows = self.table.removeRows(start=start, stop=stop)
3728+
self.table.flush()
37213729
return nrows
37223730

37233731
# infer the data kind
@@ -3726,7 +3734,7 @@ def delete(self, where=None, **kwargs):
37263734

37273735
# create the selection
37283736
table = self.table
3729-
self.selection = Selection(self, where, **kwargs)
3737+
self.selection = Selection(self, where, start=start, stop=stop, **kwargs)
37303738
values = self.selection.select_coords()
37313739

37323740
# delete the rows in reverse order
@@ -4303,13 +4311,25 @@ def select_coords(self):
43034311
"""
43044312
generate the selection
43054313
"""
4306-
if self.condition is None:
4307-
return np.arange(self.table.nrows)
4314+
start, stop = self.start, self.stop
4315+
nrows = self.table.nrows
4316+
if start is None:
4317+
start = 0
4318+
elif start < 0:
4319+
start += nrows
4320+
if self.stop is None:
4321+
stop = nrows
4322+
elif stop < 0:
4323+
stop += nrows
43084324

4309-
return self.table.table.getWhereList(self.condition.format(),
4310-
start=self.start, stop=self.stop,
4311-
sort=True)
4325+
if self.condition is not None:
4326+
return self.table.table.getWhereList(self.condition.format(),
4327+
start=start, stop=stop,
4328+
sort=True)
4329+
elif self.coordinates is not None:
4330+
return self.coordinates
43124331

4332+
return np.arange(start, stop)
43134333

43144334
# utilities ###
43154335

‎pandas/io/tests/test_pytables.py

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,6 +2195,69 @@ def test_remove_where(self):
21952195
# self.assertRaises(ValueError, store.remove,
21962196
# 'wp2', [('column', ['A', 'D'])])
21972197

2198+
def test_remove_startstop(self):
2199+
# GH #4835 and #6177
2200+
2201+
with ensure_clean_store(self.path) as store:
2202+
2203+
wp = tm.makePanel()
2204+
2205+
# start
2206+
store.put('wp1', wp, format='t')
2207+
n = store.remove('wp1', start=32)
2208+
#assert(n == 120-32)
2209+
result = store.select('wp1')
2210+
expected = wp.reindex(major_axis=wp.major_axis[:32//4])
2211+
assert_panel_equal(result, expected)
2212+
2213+
store.put('wp2', wp, format='t')
2214+
n = store.remove('wp2', start=-32)
2215+
#assert(n == 32)
2216+
result = store.select('wp2')
2217+
expected = wp.reindex(major_axis=wp.major_axis[:-32//4])
2218+
assert_panel_equal(result, expected)
2219+
2220+
# stop
2221+
store.put('wp3', wp, format='t')
2222+
n = store.remove('wp3', stop=32)
2223+
#assert(n == 32)
2224+
result = store.select('wp3')
2225+
expected = wp.reindex(major_axis=wp.major_axis[32//4:])
2226+
assert_panel_equal(result, expected)
2227+
2228+
store.put('wp4', wp, format='t')
2229+
n = store.remove('wp4', stop=-32)
2230+
#assert(n == 120-32)
2231+
result = store.select('wp4')
2232+
expected = wp.reindex(major_axis=wp.major_axis[-32//4:])
2233+
assert_panel_equal(result, expected)
2234+
2235+
# start n stop
2236+
store.put('wp5', wp, format='t')
2237+
n = store.remove('wp5', start=16, stop=-16)
2238+
#assert(n == 120-32)
2239+
result = store.select('wp5')
2240+
expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
2241+
assert_panel_equal(result, expected)
2242+
2243+
store.put('wp6', wp, format='t')
2244+
n = store.remove('wp6', start=16, stop=16)
2245+
#assert(n == 0)
2246+
result = store.select('wp6')
2247+
expected = wp.reindex(major_axis=wp.major_axis)
2248+
assert_panel_equal(result, expected)
2249+
2250+
# with where
2251+
date = wp.major_axis.take(np.arange(0,30,3))
2252+
crit = Term('major_axis=date')
2253+
store.put('wp7', wp, format='t')
2254+
n = store.remove('wp7', where=[crit], stop=80)
2255+
#assert(n == 28)
2256+
result = store.select('wp7')
2257+
expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
2258+
assert_panel_equal(result, expected)
2259+
2260+
21982261
def test_remove_crit(self):
21992262

22002263
with ensure_clean_store(self.path) as store:
@@ -3449,6 +3512,25 @@ def f():
34493512
result = store.select_column('df3', 'string')
34503513
tm.assert_almost_equal(result.values, df3['string'].values)
34513514

3515+
# start/stop
3516+
result = store.select_column('df3', 'string', start=2)
3517+
tm.assert_almost_equal(result.values, df3['string'].values[2:])
3518+
3519+
result = store.select_column('df3', 'string', start=-2)
3520+
tm.assert_almost_equal(result.values, df3['string'].values[-2:])
3521+
3522+
result = store.select_column('df3', 'string', stop=2)
3523+
tm.assert_almost_equal(result.values, df3['string'].values[:2])
3524+
3525+
result = store.select_column('df3', 'string', stop=-2)
3526+
tm.assert_almost_equal(result.values, df3['string'].values[:-2])
3527+
3528+
result = store.select_column('df3', 'string', start=2, stop=-2)
3529+
tm.assert_almost_equal(result.values, df3['string'].values[2:-2])
3530+
3531+
result = store.select_column('df3', 'string', start=-2, stop=2)
3532+
tm.assert_almost_equal(result.values, df3['string'].values[-2:2])
3533+
34523534
def test_coordinates(self):
34533535
df = tm.makeTimeDataFrame()
34543536

@@ -3519,6 +3601,12 @@ def test_coordinates(self):
35193601
self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5)
35203602
self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5,stop=10)
35213603

3604+
# selection with filter
3605+
selection = date_range('20000101',periods=500)
3606+
result = store.select('df', where='index in selection')
3607+
expected = df[df.index.isin(selection)]
3608+
tm.assert_frame_equal(result,expected)
3609+
35223610
# list
35233611
df = DataFrame(np.random.randn(10,2))
35243612
store.append('df2',df)
@@ -3533,6 +3621,11 @@ def test_coordinates(self):
35333621
expected = df.loc[where]
35343622
tm.assert_frame_equal(result,expected)
35353623

3624+
# start/stop
3625+
result = store.select('df2', start=5, stop=10)
3626+
expected = df[5:10]
3627+
tm.assert_frame_equal(result,expected)
3628+
35363629
def test_append_to_multiple(self):
35373630
df1 = tm.makeTimeDataFrame()
35383631
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
@@ -3603,11 +3696,11 @@ def test_select_as_multiple(self):
36033696
None, where=['A>0', 'B>0'], selector='df1')
36043697
self.assertRaises(Exception, store.select_as_multiple,
36053698
[None], where=['A>0', 'B>0'], selector='df1')
3606-
self.assertRaises(TypeError, store.select_as_multiple,
3699+
self.assertRaises(KeyError, store.select_as_multiple,
36073700
['df1','df3'], where=['A>0', 'B>0'], selector='df1')
36083701
self.assertRaises(KeyError, store.select_as_multiple,
36093702
['df3'], where=['A>0', 'B>0'], selector='df1')
3610-
self.assertRaises(ValueError, store.select_as_multiple,
3703+
self.assertRaises(KeyError, store.select_as_multiple,
36113704
['df1','df2'], where=['A>0', 'B>0'], selector='df4')
36123705

36133706
# default select

0 commit comments

Comments
 (0)