Skip to content

Commit b07e020

Browse files
committed
TST: more robust testing for HDFStore dups
1 parent 27b8470 commit b07e020

File tree

3 files changed

+50
-20
lines changed

3 files changed

+50
-20
lines changed

pandas/io/pytables.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def func(_start, _stop):
667667
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
668668

669669
# concat and return
670-
return concat(objs, axis=axis, verify_integrity=True)
670+
return concat(objs, axis=axis, verify_integrity=True).consolidate()
671671

672672
if iterator or chunksize is not None:
673673
return TableIterator(self, func, nrows=nrows, start=start, stop=stop, chunksize=chunksize, auto_close=auto_close)
@@ -3213,7 +3213,7 @@ def read(self, where=None, columns=None, **kwargs):
32133213
if len(objs) == 1:
32143214
wp = objs[0]
32153215
else:
3216-
wp = concat(objs, axis=0, verify_integrity=False)
3216+
wp = concat(objs, axis=0, verify_integrity=False).consolidate()
32173217

32183218
# apply the selection filters & axis orderings
32193219
wp = self.process_axes(wp, columns=columns)
@@ -3504,7 +3504,7 @@ def read(self, where=None, columns=None, **kwargs):
35043504
if len(frames) == 1:
35053505
df = frames[0]
35063506
else:
3507-
df = concat(frames, axis=1, verify_integrity=False)
3507+
df = concat(frames, axis=1, verify_integrity=False).consolidate()
35083508

35093509
# apply the selection filters & axis orderings
35103510
df = self.process_axes(df, columns=columns)
@@ -3680,12 +3680,17 @@ class AppendableNDimTable(AppendablePanelTable):
36803680
def _reindex_axis(obj, axis, labels, other=None):
36813681
ax = obj._get_axis(axis)
36823682
labels = _ensure_index(labels)
3683-
if other is None and labels.equals(ax):
3683+
3684+
# try not to reindex even if other is provided
3685+
# if it equals our current index
3686+
if other is not None:
3687+
other = _ensure_index(other)
3688+
if (other is None or labels.equals(other)) and labels.equals(ax):
36843689
return obj
36853690

36863691
labels = _ensure_index(labels.unique())
36873692
if other is not None:
3688-
labels = labels & _ensure_index(other)
3693+
labels = labels & _ensure_index(other.unique())
36893694
if not labels.equals(ax):
36903695
slicer = [ slice(None, None) ] * obj.ndim
36913696
slicer[axis] = labels

pandas/io/tests/test_pytables.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -2304,8 +2304,14 @@ def test_select_with_dups(self):
23042304

23052305
with ensure_clean(self.path) as store:
23062306
store.append('df',df)
2307+
23072308
result = store.select('df')
2308-
assert_frame_equal(result,df)
2309+
expected = df
2310+
assert_frame_equal(result,expected,by_blocks=True)
2311+
2312+
result = store.select('df',columns=df.columns)
2313+
expected = df
2314+
assert_frame_equal(result,expected,by_blocks=True)
23092315

23102316
result = store.select('df',columns=['A'])
23112317
expected = df.loc[:,['A']]
@@ -2321,15 +2327,20 @@ def test_select_with_dups(self):
23212327
store.append('df',df)
23222328

23232329
result = store.select('df')
2324-
assert_frame_equal(result,df)
2330+
expected = df
2331+
assert_frame_equal(result,expected,by_blocks=True)
2332+
2333+
result = store.select('df',columns=df.columns)
2334+
expected = df
2335+
assert_frame_equal(result,expected,by_blocks=True)
23252336

23262337
expected = df.loc[:,['A']]
23272338
result = store.select('df',columns=['A'])
2328-
assert_frame_equal(result,expected)
2339+
assert_frame_equal(result,expected,by_blocks=True)
23292340

23302341
expected = df.loc[:,['B','A']]
23312342
result = store.select('df',columns=['B','A'])
2332-
assert_frame_equal(result,expected)
2343+
assert_frame_equal(result,expected,by_blocks=True)
23332344

23342345
def test_wide_table_dups(self):
23352346
wp = tm.makePanel()

pandas/util/testing.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -258,27 +258,41 @@ def assert_frame_equal(left, right, check_dtype=True,
258258
check_column_type=False,
259259
check_frame_type=False,
260260
check_less_precise=False,
261-
check_names=True):
261+
check_names=True,
262+
by_blocks=False):
262263
if check_frame_type:
263264
assert_isinstance(left, type(right))
264265
assert_isinstance(left, DataFrame)
265266
assert_isinstance(right, DataFrame)
266267

267268
if check_less_precise:
268-
assert_almost_equal(left.columns, right.columns)
269+
if not by_blocks:
270+
assert_almost_equal(left.columns, right.columns)
269271
assert_almost_equal(left.index, right.index)
270272
else:
271-
assert_index_equal(left.columns, right.columns)
273+
if not by_blocks:
274+
assert_index_equal(left.columns, right.columns)
272275
assert_index_equal(left.index, right.index)
273276

274-
for i, col in enumerate(left.columns):
275-
assert col in right
276-
lcol = left.icol(i)
277-
rcol = right.icol(i)
278-
assert_series_equal(lcol, rcol,
279-
check_dtype=check_dtype,
280-
check_index_type=check_index_type,
281-
check_less_precise=check_less_precise)
277+
# compare by blocks
278+
if by_blocks:
279+
rblocks = right.blocks
280+
lblocks = left.blocks
281+
for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
282+
assert dtype in lblocks
283+
assert dtype in rblocks
284+
assert_frame_equal(lblocks[dtype],rblocks[dtype],check_dtype=check_dtype)
285+
286+
# compare by columns
287+
else:
288+
for i, col in enumerate(left.columns):
289+
assert col in right
290+
lcol = left.icol(i)
291+
rcol = right.icol(i)
292+
assert_series_equal(lcol, rcol,
293+
check_dtype=check_dtype,
294+
check_index_type=check_index_type,
295+
check_less_precise=check_less_precise)
282296

283297
if check_index_type:
284298
assert_isinstance(left.index, type(right.index))

0 commit comments

Comments
 (0)