Skip to content

Commit 5498e60

Browse files
author
Nick Eubank
committed
Change pytable default for dropna to false (9382)
1 parent 676cb95 commit 5498e60

File tree

3 files changed

+67
-7
lines changed

3 files changed

+67
-7
lines changed

doc/source/whatsnew/v0.17.0.txt

+40
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,46 @@ Backwards incompatible API changes
3333

3434
.. _whatsnew_0170.api_breaking:
3535

36+
- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
37+
38+
Previously,
39+
40+
.. code-block:: python
41+
42+
In [1]:
43+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
44+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
45+
46+
pd.read_hdf('file.h5', 'df_with_missing')
47+
48+
Out [1]:
49+
col1 col2
50+
0 0 1
51+
2 2 3
52+
53+
54+
New behavior:
55+
56+
.. ipython:: python
57+
:suppress:
58+
:okexcept:
59+
60+
import os
61+
os.remove('file.h5')
62+
63+
.. ipython:: python
64+
65+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
66+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
67+
68+
pd.read_hdf('file.h5', 'df_with_missing')
69+
70+
.. ipython:: python
71+
:suppress:
72+
73+
os.remove('file.h5')
74+
75+
3676
.. _whatsnew_0170.api_breaking.other:
3777

3878
Other API Changes

pandas/io/pytables.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class DuplicateWarning(Warning):
220220
"""
221221

222222
with config.config_prefix('io.hdf'):
223-
config.register_option('dropna_table', True, dropna_doc,
223+
config.register_option('dropna_table', False, dropna_doc,
224224
validator=config.is_bool)
225225
config.register_option(
226226
'default_format', None, format_doc,
@@ -802,7 +802,7 @@ def put(self, key, value, format=None, append=False, **kwargs):
802802
This will force Table format, append the input data to the
803803
existing.
804804
encoding : default None, provide an encoding for strings
805-
dropna : boolean, default True, do not write an ALL nan row to
805+
dropna : boolean, default False, do not write an ALL nan row to
806806
the store settable by the option 'io.hdf.dropna_table'
807807
"""
808808
if format is None:
@@ -884,7 +884,7 @@ def append(self, key, value, format=None, append=True, columns=None,
884884
chunksize : size to chunk the writing
885885
expectedrows : expected TOTAL row size of this table
886886
encoding : default None, provide an encoding for strings
887-
dropna : boolean, default True, do not write an ALL nan row to
887+
dropna : boolean, default False, do not write an ALL nan row to
888888
the store settable by the option 'io.hdf.dropna_table'
889889
Notes
890890
-----
@@ -904,7 +904,7 @@ def append(self, key, value, format=None, append=True, columns=None,
904904
**kwargs)
905905

906906
def append_to_multiple(self, d, value, selector, data_columns=None,
907-
axes=None, dropna=True, **kwargs):
907+
axes=None, dropna=False, **kwargs):
908908
"""
909909
Append to multiple tables
910910
@@ -919,7 +919,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
919919
data_columns : list of columns to create as data columns, or True to
920920
use all columns
921921
dropna : if evaluates to True, drop rows from all tables if any single
922-
row in each table has all NaN
922+
row in each table has all NaN. Default False.
923923
924924
Notes
925925
-----
@@ -3746,7 +3746,7 @@ class AppendableTable(LegacyTable):
37463746

37473747
def write(self, obj, axes=None, append=False, complib=None,
37483748
complevel=None, fletcher32=None, min_itemsize=None,
3749-
chunksize=None, expectedrows=None, dropna=True, **kwargs):
3749+
chunksize=None, expectedrows=None, dropna=False, **kwargs):
37503750

37513751
if not append and self.is_exists:
37523752
self._handle.remove_node(self.group, 'table')
@@ -3783,7 +3783,7 @@ def write(self, obj, axes=None, append=False, complib=None,
37833783
# add the rows
37843784
self.write_data(chunksize, dropna=dropna)
37853785

3786-
def write_data(self, chunksize, dropna=True):
3786+
def write_data(self, chunksize, dropna=False):
37873787
""" we form the data into a 2-d including indexes,values,mask
37883788
write chunk-by-chunk """
37893789

pandas/io/tests/test_pytables.py

+20
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,17 @@ def test_append_all_nans(self):
10371037
store.append('df2', df[10:], dropna=False)
10381038
tm.assert_frame_equal(store['df2'], df)
10391039

1040+
# Test to make sure defaults are to not drop.
1041+
# Corresponding to Issue 9382
1042+
df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
1043+
1044+
with ensure_clean_path(self.path) as path:
1045+
df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
1046+
reloaded = read_hdf(path, 'df_with_missing')
1047+
tm.assert_frame_equal(df_with_missing, reloaded)
1048+
1049+
1050+
10401051
def test_append_frame_column_oriented(self):
10411052

10421053
with ensure_clean_store(self.path) as store:
@@ -4640,6 +4651,15 @@ def test_colums_multiindex_modified(self):
46404651
df_loaded = read_hdf(path, 'df', columns=cols2load)
46414652
self.assertTrue(cols2load_original == cols2load)
46424653

4654+
def test_all_missing_values(self):
4655+
# Test corresponding to Issue 9382
4656+
df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
4657+
4658+
with ensure_clean_path(self.path) as path:
4659+
df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
4660+
reloaded = read_hdf(path, 'df_with_missing')
4661+
tm.assert_frame_equal(df_with_missing, reloaded)
4662+
46434663

46444664
def _test_sort(obj):
46454665
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)