Skip to content

BUG: pytables not writing rows where all-nan in a part of a block #3013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ pandas 0.11.0
underscore)
- fixes for query parsing to correctly interpret boolean and != (GH2849_, GH2973_)
- fixes for pathological case on SparseSeries with 0-len array and compression (GH2931_)
- fixes bug with writing rows if part of a block was all-nan (GH3012_)

- Bug showing up in applymap where some object type columns are converted (GH2909_)
had an incorrect default in convert_objects
Expand Down Expand Up @@ -165,6 +166,7 @@ pandas 0.11.0
.. _GH2982: https://github.com/pydata/pandas/issues/2982
.. _GH2989: https://github.com/pydata/pandas/issues/2989
.. _GH3002: https://github.com/pydata/pandas/issues/3002
.. _GH3012: https://github.com/pydata/pandas/issues/3012


pandas 0.10.1
Expand Down
19 changes: 11 additions & 8 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1461,8 +1461,7 @@ beginning. You can use the supplied ``PyTables`` utility
``ptrepack``. In addition, ``ptrepack`` can change compression levels
after the fact.

- ``ptrepack --chunkshape=auto --propindexes --complevel=9
--complib=blosc in.h5 out.h5``
- ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5``

Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow
you to reuse previously deleted space. Aalternatively, one can simply
Expand All @@ -1473,6 +1472,10 @@ Notes & Caveats

- Once a ``table`` is created its items (Panel) / columns (DataFrame)
are fixed; only exactly the same columns can be appended
- If a row has ``np.nan`` for **EVERY COLUMN** (having a ``nan``
in a string, or a ``NaT`` in a datetime-like column counts as having
a value), then those rows **WILL BE DROPPED IMPLICITLY**. This limitation
*may* be addressed in the future.
- You can not append/select/delete to a non-table (table creation is
determined on the first append, or by passing ``table=True`` in a
put operation)
Expand All @@ -1498,13 +1501,13 @@ Notes & Caveats

.. ipython:: python

store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
store.append('wp_big_strings', wp)
store.select('wp_big_strings')
store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
store.append('wp_big_strings', wp)
store.select('wp_big_strings')

# we have provided a minimum minor_axis indexable size
store.root.wp_big_strings.table
# we have provided a minimum minor_axis indexable size
store.root.wp_big_strings.table

DataTypes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2577,7 +2577,7 @@ def write_data(self, chunksize):
# consolidate masks
mask = masks[0]
for m in masks[1:]:
m = mask & m
mask = mask & m

# the arguments
indexes = [a.cvalues for a in self.index_axes]
Expand Down
79 changes: 79 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,85 @@ def test_append(self):
store.append('df', df)
tm.assert_frame_equal(store['df'], df)

def test_append_some_nans(self):

with ensure_clean(self.path) as store:
df = DataFrame({'A' : Series(np.random.randn(20)).astype('int32'),
'A1' : np.random.randn(20),
'A2' : np.random.randn(20),
'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
index=np.arange(20))
# some nans
store.remove('df1')
df.ix[0:15,['A1','B','D','E']] = np.nan
store.append('df1', df[:10])
store.append('df1', df[10:])
tm.assert_frame_equal(store['df1'], df)

# first column
df1 = df.copy()
df1.ix[:,'A1'] = np.nan
store.remove('df1')
store.append('df1', df1[:10])
store.append('df1', df1[10:])
tm.assert_frame_equal(store['df1'], df1)

# 2nd column
df2 = df.copy()
df2.ix[:,'A2'] = np.nan
store.remove('df2')
store.append('df2', df2[:10])
store.append('df2', df2[10:])
tm.assert_frame_equal(store['df2'], df2)

# datetimes
df3 = df.copy()
df3.ix[:,'E'] = np.nan
store.remove('df3')
store.append('df3', df3[:10])
store.append('df3', df3[10:])
tm.assert_frame_equal(store['df3'], df3)

##### THIS IS A BUG, should not drop these all-nan rows
##### BUT need to store the index which we don't want to do....
# nan some entire rows
df = DataFrame({'A1' : np.random.randn(20),
'A2' : np.random.randn(20)},
index=np.arange(20))

store.remove('df4')
df.ix[0:15,:] = np.nan
store.append('df4', df[:10])
store.append('df4', df[10:])
tm.assert_frame_equal(store['df4'], df[-4:])
self.assert_(store.get_storer('df4').nrows == 4)

# nan some entire rows (string are still written!)
df = DataFrame({'A1' : np.random.randn(20),
'A2' : np.random.randn(20),
'B' : 'foo', 'C' : 'bar'},
index=np.arange(20))

store.remove('df5')
df.ix[0:15,:] = np.nan
store.append('df5', df[:10])
store.append('df5', df[10:])
tm.assert_frame_equal(store['df5'], df)
self.assert_(store.get_storer('df5').nrows == 20)

# nan some entire rows (but since we have dates they are still written!)
df = DataFrame({'A1' : np.random.randn(20),
'A2' : np.random.randn(20),
'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime.datetime(2001,1,2,0,0) },
index=np.arange(20))

store.remove('df6')
df.ix[0:15,:] = np.nan
store.append('df6', df[:10])
store.append('df6', df[10:])
tm.assert_frame_equal(store['df6'], df)
self.assert_(store.get_storer('df6').nrows == 20)

def test_append_frame_column_oriented(self):

with ensure_clean(self.path) as store:
Expand Down