Skip to content

Commit 3c1207b

Browse files
author
Nick Eubank
committed
Change pytable default for dropna to false (9382)
1 parent fe735be commit 3c1207b

File tree

3 files changed

+57
-7
lines changed

3 files changed

+57
-7
lines changed

doc/source/whatsnew/v0.17.0.txt

+41
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,47 @@ Backwards incompatible API changes
3333

3434
.. _whatsnew_0170.api_breaking:
3535

36+
- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
37+
38+
Previously,
39+
40+
.. code-block:: python
41+
42+
In [1]:
43+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
44+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
45+
46+
pd.read_hdf('file.h5', 'df_with_missing')
47+
48+
Out [1]:
49+
col1 col2
50+
0 0 1
51+
2 2 3
52+
53+
54+
New behavior:
55+
56+
.. ipython:: python
57+
:suppress:
58+
:okexcept:
59+
60+
import os
61+
os.remove('file.h5')
62+
63+
.. ipython:: python
64+
65+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
66+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
67+
68+
pd.read_hdf('file.h5', 'df_with_missing')
69+
70+
.. ipython:: python
71+
:suppress:
72+
73+
import os
74+
os.remove('file.h5')
75+
76+
3677
.. _whatsnew_0170.api_breaking.other:
3778

3879
Other API Changes

pandas/io/pytables.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class DuplicateWarning(Warning):
220220
"""
221221

222222
with config.config_prefix('io.hdf'):
223-
config.register_option('dropna_table', True, dropna_doc,
223+
config.register_option('dropna_table', False, dropna_doc,
224224
validator=config.is_bool)
225225
config.register_option(
226226
'default_format', None, format_doc,
@@ -802,7 +802,7 @@ def put(self, key, value, format=None, append=False, **kwargs):
802802
This will force Table format, append the input data to the
803803
existing.
804804
encoding : default None, provide an encoding for strings
805-
dropna : boolean, default True, do not write an ALL nan row to
805+
dropna : boolean, default False, do not write an ALL nan row to
806806
the store settable by the option 'io.hdf.dropna_table'
807807
"""
808808
if format is None:
@@ -884,7 +884,7 @@ def append(self, key, value, format=None, append=True, columns=None,
884884
chunksize : size to chunk the writing
885885
expectedrows : expected TOTAL row size of this table
886886
encoding : default None, provide an encoding for strings
887-
dropna : boolean, default True, do not write an ALL nan row to
887+
dropna : boolean, default False, do not write an ALL nan row to
888888
the store settable by the option 'io.hdf.dropna_table'
889889
Notes
890890
-----
@@ -904,7 +904,7 @@ def append(self, key, value, format=None, append=True, columns=None,
904904
**kwargs)
905905

906906
def append_to_multiple(self, d, value, selector, data_columns=None,
907-
axes=None, dropna=True, **kwargs):
907+
axes=None, dropna=False, **kwargs):
908908
"""
909909
Append to multiple tables
910910
@@ -919,7 +919,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
919919
data_columns : list of columns to create as data columns, or True to
920920
use all columns
921921
dropna : if evaluates to True, drop rows from all tables if any single
922-
row in each table has all NaN
922+
row in each table has all NaN. Default False.
923923
924924
Notes
925925
-----
@@ -3742,7 +3742,7 @@ class AppendableTable(LegacyTable):
37423742

37433743
def write(self, obj, axes=None, append=False, complib=None,
37443744
complevel=None, fletcher32=None, min_itemsize=None,
3745-
chunksize=None, expectedrows=None, dropna=True, **kwargs):
3745+
chunksize=None, expectedrows=None, dropna=False, **kwargs):
37463746

37473747
if not append and self.is_exists:
37483748
self._handle.remove_node(self.group, 'table')
@@ -3779,7 +3779,7 @@ def write(self, obj, axes=None, append=False, complib=None,
37793779
# add the rows
37803780
self.write_data(chunksize, dropna=dropna)
37813781

3782-
def write_data(self, chunksize, dropna=True):
3782+
def write_data(self, chunksize, dropna=False):
37833783
""" we form the data into a 2-d including indexes,values,mask
37843784
write chunk-by-chunk """
37853785

pandas/io/tests/test_pytables.py

+9
Original file line numberDiff line numberDiff line change
@@ -4617,6 +4617,15 @@ def test_preserve_timedeltaindex_type(self):
46174617
store['df'] = df
46184618
assert_frame_equal(store['df'], df)
46194619

4620+
def test_all_missing_values(self):
4621+
# Test corresponding to Issue 9382
4622+
df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
4623+
4624+
with ensure_clean_path(self.path) as path:
4625+
df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
4626+
reloaded = read_hdf(path, 'df_with_missing')
4627+
tm.assert_frame_equal(df_with_missing, reloaded)
4628+
46204629

46214630
def _test_sort(obj):
46224631
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)