Skip to content

Commit 8e24e8a

Browse files
author
Brendan Boerner
committed
BUG: fix HDFStore iterator to handle a where properly (GH8014)
* Add tests for selecting *subset* of data.
1 parent 31f44e5 commit 8e24e8a

File tree

2 files changed

+121
-25
lines changed

2 files changed

+121
-25
lines changed

pandas/io/tests/test_pytables.py

+55
Original file line numberDiff line numberDiff line change
@@ -3442,6 +3442,61 @@ def test_select_iterator_8014(self):
34423442
result = concat(results)
34433443
tm.assert_frame_equal(expected, result)
34443444

3445+
#
3446+
# retrieve subset
3447+
#
3448+
3449+
l_expected = expected[1:]
3450+
r_expected = expected[:-1]
3451+
b_expected = expected[1:-1]
3452+
beg_dt = expected.index[1]
3453+
end_dt = expected.index[-2]
3454+
3455+
#
3456+
# w/o iterator
3457+
#
3458+
3459+
# select w/o iterator and where clause, single term, begin
3460+
# of range, works
3461+
where = "index >= '%s'" % beg_dt
3462+
result = store.select('df',where=where)
3463+
tm.assert_frame_equal(l_expected, result)
3464+
3465+
# select w/o iterator and where clause, single term, end
3466+
# of range, works
3467+
where = "index <= '%s'" % end_dt
3468+
result = store.select('df',where=where)
3469+
tm.assert_frame_equal(r_expected, result)
3470+
3471+
# select w/o iterator and where clause, inclusive range,
3472+
# works
3473+
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
3474+
result = store.select('df',where=where)
3475+
tm.assert_frame_equal(b_expected, result)
3476+
3477+
#
3478+
# with iterator
3479+
#
3480+
3481+
# select w/iterator and where clause, single term, begin of range
3482+
# hang in the list comprehension
3483+
where = "index >= '%s'" % beg_dt
3484+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3485+
result = concat(results)
3486+
tm.assert_frame_equal(expected, result)
3487+
3488+
# select w/iterator and where clause, single term, end of range
3489+
where = "index <= '%s'" % end_dt
3490+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3491+
result = concat(results)
3492+
tm.assert_frame_equal(expected, result)
3493+
3494+
# select w/iterator and where clause, inclusive range
3495+
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
3496+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3497+
result = concat(results)
3498+
tm.assert_frame_equal(expected, result)
3499+
34453500
def test_retain_index_attributes(self):
34463501

34473502
# GH 3499, losing frequency info on index recreation

pandas/io/tests/test_pytables2.py

+66-25
Original file line numberDiff line numberDiff line change
@@ -148,24 +148,20 @@ def tearDown(self):
148148
def test_select_iterator_8014(self):
149149

150150
# single table
151-
chunksize=1e4
152151
with ensure_clean_store(self.path) as store:
153152

154-
df = tm.makeTimeDataFrame(100064, 'S')
153+
chunksize=1e4
154+
expected = tm.makeTimeDataFrame(100064, 'S')
155155
_maybe_remove(store, 'df')
156-
i = 0; ln = 58689
157-
store.append('df', df[i:i+ln])
158-
i = i+ln; ln = 41375
159-
store.append('df', df[i:i+ln])
160-
expected = df
156+
store.append('df',expected)
161157

162158
beg_dt = expected.index[0]
163159
end_dt = expected.index[-1]
164160

165161
#
166162
# w/o iterator
167163
#
168-
164+
169165
# select w/o iteration and no where clause works
170166
result = store.select('df')
171167
tm.assert_frame_equal(expected, result)
@@ -193,35 +189,80 @@ def test_select_iterator_8014(self):
193189
#
194190

195191
# select w/iterator and no where clause works
196-
results = []
197-
for s in store.select('df',iterator=True, chunksize=chunksize):
198-
results.append(s)
192+
results = [ s for s in store.select('df',chunksize=chunksize) ]
193+
result = concat(results)
194+
tm.assert_frame_equal(expected, result)
195+
196+
# select w/iterator and where clause, single term, begin of range
197+
where = "index >= '%s'" % beg_dt
198+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
199+
result = concat(results)
200+
tm.assert_frame_equal(expected, result)
201+
202+
# select w/iterator and where clause, single term, end of range
203+
where = "index <= '%s'" % end_dt
204+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
205+
result = concat(results)
206+
tm.assert_frame_equal(expected, result)
207+
208+
# select w/iterator and where clause, inclusive range
209+
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
210+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
199211
result = concat(results)
200212
tm.assert_frame_equal(expected, result)
201213

202-
# select w/iterator and where clause, single term, begin
203-
# of range, fails
214+
#
215+
# retrieve subset
216+
#
217+
218+
l_expected = expected[1:]
219+
r_expected = expected[:-1]
220+
b_expected = expected[1:-1]
221+
beg_dt = expected.index[1]
222+
end_dt = expected.index[-2]
223+
224+
#
225+
# w/o iterator
226+
#
227+
228+
# select w/o iterator and where clause, single term, begin
229+
# of range, works
230+
where = "index >= '%s'" % beg_dt
231+
result = store.select('df',where=where)
232+
tm.assert_frame_equal(l_expected, result)
233+
234+
# select w/o iterator and where clause, single term, end
235+
# of range, works
236+
where = "index <= '%s'" % end_dt
237+
result = store.select('df',where=where)
238+
tm.assert_frame_equal(r_expected, result)
239+
240+
# select w/o iterator and where clause, inclusive range,
241+
# works
242+
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
243+
result = store.select('df',where=where)
244+
tm.assert_frame_equal(b_expected, result)
245+
246+
#
247+
# with iterator
248+
#
249+
250+
# select w/iterator and where clause, single term, begin of range
251+
# hang in the list comprehension
204252
where = "index >= '%s'" % beg_dt
205-
results = []
206-
for s in store.select('df',where=where,iterator=True, chunksize=chunksize):
207-
results.append(s)
253+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
208254
result = concat(results)
209255
tm.assert_frame_equal(expected, result)
210256

211-
# select w/iterator and where clause, single term, end of
212-
# range, fails
257+
# select w/iterator and where clause, single term, end of range
213258
where = "index <= '%s'" % end_dt
214-
results = []
215-
for s in store.select('df',where=where,iterator=True, chunksize=chunksize):
216-
results.append(s)
259+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
217260
result = concat(results)
218261
tm.assert_frame_equal(expected, result)
219262

220-
# select w/iterator and where clause, inclusive range, fails
263+
# select w/iterator and where clause, inclusive range
221264
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
222-
results = []
223-
for s in store.select('df',where=where,iterator=True, chunksize=chunksize):
224-
results.append(s)
265+
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
225266
result = concat(results)
226267
tm.assert_frame_equal(expected, result)
227268

0 commit comments

Comments
 (0)