Skip to content

Commit d7d2937

Browse files
author
Brendan Boerner
committed
Revert "BUG: fix HDFStore iterator to handle a where properly (GH8014)"
This reverts commit a6f27ff mistakenly commited to master.
1 parent a92fc82 commit d7d2937

File tree

3 files changed

+19
-100
lines changed

3 files changed

+19
-100
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ Bug Fixes
454454
- Bug in pickle deserialization that failed for pre-0.14.1 containers with dup items trying to avoid ambiguity
455455
when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`)
456456

457-
- Bug in HDFStore iteration when passing a where (:issue:`8014`)
457+
458458

459459
- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`)
460460

pandas/io/pytables.py

+18-33
Original file line numberDiff line numberDiff line change
@@ -662,22 +662,15 @@ def select(self, key, where=None, start=None, stop=None, columns=None,
662662
s = self._create_storer(group)
663663
s.infer_axes()
664664

665+
# what we are actually going to do for a chunk
665666
def func(_start, _stop):
666-
return s.read(start=_start, stop=_stop,
667-
where=where,
667+
return s.read(where=where, start=_start, stop=_stop,
668668
columns=columns, **kwargs)
669669

670670
if iterator or chunksize is not None:
671671
if not s.is_table:
672672
raise TypeError(
673673
"can only use an iterator or chunksize on a table")
674-
675-
# read the coordinates & iterate
676-
if where is not None:
677-
c = s.read_coordinates(where=where, **kwargs)
678-
def func(_start, _stop):
679-
return s.read(where=c[_start:_stop], columns=columns, **kwargs)
680-
681674
return TableIterator(self, func, nrows=s.nrows, start=start,
682675
stop=stop, chunksize=chunksize,
683676
auto_close=auto_close)
@@ -786,26 +779,18 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
786779
# axis is the concentation axes
787780
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
788781

789-
# for a not-none where, select the coordinates and chunk on those
790-
if where is not None:
791-
c = s.read_coordinates(where=where, **kwargs)
792-
793-
def func(_start, _stop):
794-
objs = [t.read(where=c[_start:_stop], columns=columns, **kwargs) for t in tbls]
795-
796-
# concat and return
797-
return concat(objs, axis=axis,
798-
verify_integrity=False).consolidate()
799-
800-
else:
782+
def func(_start, _stop):
783+
if where is not None:
784+
c = s.read_coordinates(where=where, start=_start, stop=_stop, **kwargs)
785+
else:
786+
c = None
801787

802-
def func(_start, _stop):
803-
objs = [t.read(start=_start, stop=_stop,
804-
columns=columns, **kwargs) for t in tbls]
788+
objs = [t.read(where=c, start=_start, stop=_stop,
789+
columns=columns, **kwargs) for t in tbls]
805790

806-
# concat and return
807-
return concat(objs, axis=axis,
808-
verify_integrity=False).consolidate()
791+
# concat and return
792+
return concat(objs, axis=axis,
793+
verify_integrity=False).consolidate()
809794

810795
if iterator or chunksize is not None:
811796
return TableIterator(self, func, nrows=nrows, start=start,
@@ -1332,20 +1317,20 @@ def __init__(self, store, func, nrows, start=None, stop=None,
13321317
if chunksize is None:
13331318
chunksize = 100000
13341319

1335-
self.chunksize = int(chunksize)
1320+
self.chunksize = chunksize
13361321
self.auto_close = auto_close
13371322

13381323
def __iter__(self):
13391324
current = self.start
13401325
while current < self.stop:
1326+
stop = current + self.chunksize
1327+
v = self.func(current, stop)
1328+
current = stop
13411329

1342-
stop = min(current + self.chunksize, self.stop)
1343-
value = self.func(current, stop)
1344-
if value is None:
1330+
if v is None:
13451331
continue
1346-
current = current + min(self.chunksize,len(value))
13471332

1348-
yield value
1333+
yield v
13491334

13501335
self.close()
13511336

pandas/io/tests/test_pytables.py

-66
Original file line numberDiff line numberDiff line change
@@ -3376,72 +3376,6 @@ def test_select_iterator(self):
33763376
#result = concat(results)
33773377
#tm.assert_frame_equal(expected, result)
33783378

3379-
def test_select_iterator_8014(self):
3380-
3381-
# single table
3382-
with ensure_clean_store(self.path) as store:
3383-
3384-
chunksize=1e4
3385-
expected = tm.makeTimeDataFrame(100064, 'S')
3386-
_maybe_remove(store, 'df')
3387-
store.append('df',expected)
3388-
3389-
beg_dt = expected.index[0]
3390-
end_dt = expected.index[-1]
3391-
3392-
#
3393-
# w/o iterator
3394-
#
3395-
3396-
# select w/o iteration and no where clause works
3397-
result = store.select('df')
3398-
tm.assert_frame_equal(expected, result)
3399-
3400-
# select w/o iterator and where clause, single term, begin
3401-
# of range, works
3402-
where = "index >= '%s'" % beg_dt
3403-
result = store.select('df',where=where)
3404-
tm.assert_frame_equal(expected, result)
3405-
3406-
# select w/o iterator and where clause, single term, end
3407-
# of range, works
3408-
where = "index <= '%s'" % end_dt
3409-
result = store.select('df',where=where)
3410-
tm.assert_frame_equal(expected, result)
3411-
3412-
# select w/o iterator and where clause, inclusive range,
3413-
# works
3414-
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
3415-
result = store.select('df',where=where)
3416-
tm.assert_frame_equal(expected, result)
3417-
3418-
#
3419-
# with iterator
3420-
#
3421-
3422-
# select w/iterator and no where clause works
3423-
results = [ s for s in store.select('df',chunksize=chunksize) ]
3424-
result = concat(results)
3425-
tm.assert_frame_equal(expected, result)
3426-
3427-
# select w/iterator and where clause, single term, begin of range
3428-
where = "index >= '%s'" % beg_dt
3429-
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3430-
result = concat(results)
3431-
tm.assert_frame_equal(expected, result)
3432-
3433-
# select w/iterator and where clause, single term, end of range
3434-
where = "index <= '%s'" % end_dt
3435-
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3436-
result = concat(results)
3437-
tm.assert_frame_equal(expected, result)
3438-
3439-
# select w/iterator and where clause, inclusive range
3440-
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
3441-
results = [ s for s in store.select('df',where=where,chunksize=chunksize) ]
3442-
result = concat(results)
3443-
tm.assert_frame_equal(expected, result)
3444-
34453379
def test_retain_index_attributes(self):
34463380

34473381
# GH 3499, losing frequency info on index recreation

0 commit comments

Comments
 (0)