Skip to content

Commit be35684

Browse files
committed
BUG: pytables not writing rows where all-nan in a part of a block
TST: more tests with all-nan rows/columns DOC: corrections, add warning about dropping all-nan rows
1 parent c9a7245 commit be35684

File tree

4 files changed

+93
-9
lines changed

4 files changed

+93
-9
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ pandas 0.11.0
118118
underscore)
119119
- fixes for query parsing to correctly interpret boolean and != (GH2849_, GH2973_)
120120
- fixes for pathological case on SparseSeries with 0-len array and compression (GH2931_)
121+
- fixes bug with writing rows if part of a block was all-nan (GH3012_)
121122

122123
- Bug showing up in applymap where some object type columns are converted (GH2909_)
123124
had an incorrect default in convert_objects
@@ -165,6 +166,7 @@ pandas 0.11.0
165166
.. _GH2982: https://github.com/pydata/pandas/issues/2982
166167
.. _GH2989: https://github.com/pydata/pandas/issues/2989
167168
.. _GH3002: https://github.com/pydata/pandas/issues/3002
169+
.. _GH3012: https://github.com/pydata/pandas/issues/3012
168170

169171

170172
pandas 0.10.1

doc/source/io.rst

+11-8
Original file line numberDiff line numberDiff line change
@@ -1461,8 +1461,7 @@ beginning. You can use the supplied ``PyTables`` utility
14611461
``ptrepack``. In addition, ``ptrepack`` can change compression levels
14621462
after the fact.
14631463

1464-
- ``ptrepack --chunkshape=auto --propindexes --complevel=9
1465-
--complib=blosc in.h5 out.h5``
1464+
- ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5``
14661465

14671466
Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow
14681467
you to reuse previously deleted space. Aalternatively, one can simply
@@ -1473,6 +1472,10 @@ Notes & Caveats
14731472

14741473
- Once a ``table`` is created its items (Panel) / columns (DataFrame)
14751474
are fixed; only exactly the same columns can be appended
1475+
- If a row has ``np.nan`` for **EVERY COLUMN** (having a ``nan``
1476+
in a string, or a ``NaT`` in a datetime-like column counts as having
1477+
a value), then those rows **WILL BE DROPPED IMPLICITLY**. This limitation
1478+
*may* be addressed in the future.
14761479
- You can not append/select/delete to a non-table (table creation is
14771480
determined on the first append, or by passing ``table=True`` in a
14781481
put operation)
@@ -1498,13 +1501,13 @@ Notes & Caveats
14981501

14991502
.. ipython:: python
15001503
1501-
store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
1502-
wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
1503-
store.append('wp_big_strings', wp)
1504-
store.select('wp_big_strings')
1504+
store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
1505+
wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
1506+
store.append('wp_big_strings', wp)
1507+
store.select('wp_big_strings')
15051508
1506-
# we have provided a minimum minor_axis indexable size
1507-
store.root.wp_big_strings.table
1509+
# we have provided a minimum minor_axis indexable size
1510+
store.root.wp_big_strings.table
15081511
15091512
DataTypes
15101513
~~~~~~~~~

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2577,7 +2577,7 @@ def write_data(self, chunksize):
25772577
# consolidate masks
25782578
mask = masks[0]
25792579
for m in masks[1:]:
2580-
m = mask & m
2580+
mask = mask & m
25812581

25822582
# the arguments
25832583
indexes = [a.cvalues for a in self.index_axes]

pandas/io/tests/test_pytables.py

+79
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,85 @@ def test_append(self):
417417
store.append('df', df)
418418
tm.assert_frame_equal(store['df'], df)
419419

420+
def test_append_some_nans(self):
421+
422+
with ensure_clean(self.path) as store:
423+
df = DataFrame({'A' : Series(np.random.randn(20)).astype('int32'),
424+
'A1' : np.random.randn(20),
425+
'A2' : np.random.randn(20),
426+
'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
427+
index=np.arange(20))
428+
# some nans
429+
store.remove('df1')
430+
df.ix[0:15,['A1','B','D','E']] = np.nan
431+
store.append('df1', df[:10])
432+
store.append('df1', df[10:])
433+
tm.assert_frame_equal(store['df1'], df)
434+
435+
# first column
436+
df1 = df.copy()
437+
df1.ix[:,'A1'] = np.nan
438+
store.remove('df1')
439+
store.append('df1', df1[:10])
440+
store.append('df1', df1[10:])
441+
tm.assert_frame_equal(store['df1'], df1)
442+
443+
# 2nd column
444+
df2 = df.copy()
445+
df2.ix[:,'A2'] = np.nan
446+
store.remove('df2')
447+
store.append('df2', df2[:10])
448+
store.append('df2', df2[10:])
449+
tm.assert_frame_equal(store['df2'], df2)
450+
451+
# datetimes
452+
df3 = df.copy()
453+
df3.ix[:,'E'] = np.nan
454+
store.remove('df3')
455+
store.append('df3', df3[:10])
456+
store.append('df3', df3[10:])
457+
tm.assert_frame_equal(store['df3'], df3)
458+
459+
##### THIS IS A BUG, should not drop these all-nan rows
460+
##### BUT need to store the index which we don't want to do....
461+
# nan some entire rows
462+
df = DataFrame({'A1' : np.random.randn(20),
463+
'A2' : np.random.randn(20)},
464+
index=np.arange(20))
465+
466+
store.remove('df4')
467+
df.ix[0:15,:] = np.nan
468+
store.append('df4', df[:10])
469+
store.append('df4', df[10:])
470+
tm.assert_frame_equal(store['df4'], df[-4:])
471+
self.assert_(store.get_storer('df4').nrows == 4)
472+
473+
# nan some entire rows (string are still written!)
474+
df = DataFrame({'A1' : np.random.randn(20),
475+
'A2' : np.random.randn(20),
476+
'B' : 'foo', 'C' : 'bar'},
477+
index=np.arange(20))
478+
479+
store.remove('df5')
480+
df.ix[0:15,:] = np.nan
481+
store.append('df5', df[:10])
482+
store.append('df5', df[10:])
483+
tm.assert_frame_equal(store['df5'], df)
484+
self.assert_(store.get_storer('df5').nrows == 20)
485+
486+
# nan some entire rows (but since we have dates they are still written!)
487+
df = DataFrame({'A1' : np.random.randn(20),
488+
'A2' : np.random.randn(20),
489+
'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
490+
index=np.arange(20))
491+
492+
store.remove('df6')
493+
df.ix[0:15,:] = np.nan
494+
store.append('df6', df[:10])
495+
store.append('df6', df[10:])
496+
tm.assert_frame_equal(store['df6'], df)
497+
self.assert_(store.get_storer('df6').nrows == 20)
498+
420499
def test_append_frame_column_oriented(self):
421500

422501
with ensure_clean(self.path) as store:

0 commit comments

Comments
 (0)