From 9ab8c23e50cfadd45995cc3ce5885ebd32e9f68a Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Fri, 13 Feb 2015 11:39:51 -0800
Subject: [PATCH 1/9] Default values for dropna to "False" (issue 9382)

PLEASE REVIEW: This is my commit to a major project, and would appreciate a quick once over!

As per discussion in Issue 9382, changes all HDF functions from having default of dropping all rows with NA in all non-index rows.
---
 pandas/io/pytables.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 05510f655f7be..7b695a82fc711 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -219,7 +219,7 @@ class DuplicateWarning(Warning):
 """
 
 with config.config_prefix('io.hdf'):
-    config.register_option('dropna_table', True, dropna_doc,
+    config.register_option('dropna_table', False, dropna_doc,
                            validator=config.is_bool)
     config.register_option(
         'default_format', None, format_doc,
@@ -801,8 +801,8 @@ def put(self, key, value, format=None, append=False, **kwargs):
             This will force Table format, append the input data to the
             existing.
         encoding : default None, provide an encoding for strings
-        dropna   : boolean, default True, do not write an ALL nan row to
-            the store settable by the option 'io.hdf.dropna_table'
+        dropna   : boolean, default False. if True do not write an ALL nan 
+            row to the store. Settable by the option 'io.hdf.dropna_table'
         """
         if format is None:
             format = get_option("io.hdf.default_format") or 'fixed'
@@ -883,8 +883,8 @@ def append(self, key, value, format=None, append=True, columns=None,
         chunksize    : size to chunk the writing
         expectedrows : expected TOTAL row size of this table
         encoding     : default None, provide an encoding for strings
-        dropna       : boolean, default True, do not write an ALL nan row to
-            the store settable by the option 'io.hdf.dropna_table'
+        dropna       : boolean, default False. If true, do not write an ALL nan 
+            row to the store. settable by the option 'io.hdf.dropna_table'
         Notes
         -----
         Does *not* check if data being appended overlaps with existing
@@ -903,7 +903,7 @@ def append(self, key, value, format=None, append=True, columns=None,
                              **kwargs)
 
     def append_to_multiple(self, d, value, selector, data_columns=None,
-                           axes=None, dropna=True, **kwargs):
+                           axes=None, dropna=False, **kwargs):
         """
         Append to multiple tables
 
@@ -918,7 +918,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
         data_columns : list of columns to create as data columns, or True to
             use all columns
         dropna : if evaluates to True, drop rows from all tables if any single
-                 row in each table has all NaN
+                 row in each table has all NaN. Default False. 
 
         Notes
         -----
@@ -3740,7 +3740,7 @@ class AppendableTable(LegacyTable):
 
     def write(self, obj, axes=None, append=False, complib=None,
               complevel=None, fletcher32=None, min_itemsize=None,
-              chunksize=None, expectedrows=None, dropna=True, **kwargs):
+              chunksize=None, expectedrows=None, dropna=False, **kwargs):
 
         if not append and self.is_exists:
             self._handle.remove_node(self.group, 'table')
@@ -3777,7 +3777,7 @@ def write(self, obj, axes=None, append=False, complib=None,
         # add the rows
         self.write_data(chunksize, dropna=dropna)
 
-    def write_data(self, chunksize, dropna=True):
+    def write_data(self, chunksize, dropna=False):
         """ we form the data into a 2-d including indexes,values,mask
             write chunk-by-chunk """
 

From 1d7808c9e6b78d1c073d194eceee46675e0b9964 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Fri, 13 Feb 2015 14:04:21 -0800
Subject: [PATCH 2/9] Update v0.16.0.txt

---
 doc/source/whatsnew/v0.16.0.txt | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index bcae3327828bf..c0a89b5fe7b8f 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -130,6 +130,33 @@ methods (:issue:`9088`).
     d    7
     dtype: int64
 
+ - default behavior for HDF write functions is now to keep rows that are all missing except for index. (:issue:`9382`)
+ 
+ Previously,
+
+  .. ipython:: python
+    In [1]: myFile = HDFStore('file.hdf')
+            seriesWithMissingRow = pd.Series([0, np.nan, 2], index = ['user1', 'user2', 'user3'])
+            myFile.append('fileKey', seriesWithMissingRow, append = False)
+            myFile['fileKey']
+
+    Out[1]:
+           user1    0
+           user3    2
+           dtype: float64
+
+ New behavior:
+  .. ipython:: python
+    In [2]: myFile = HDFStore('file.hdf')
+            seriesWithMissingRow = pd.Series([0, np.nan, 2], index = ['user1', 'user2', 'user3'])
+            myFile.append('fileKey', seriesWithMissingRow, append = False)
+            myFile['fileKey']
+
+    Out[2]:
+            user1     0
+            user2   NaN
+            user3     2
+            dtype: float64
 
 
 Deprecations

From 66dfc6b4c8e49c765195554cffa7b01a4c1bbd88 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Mon, 16 Feb 2015 19:19:10 -0800
Subject: [PATCH 3/9] Update v0.16.0.txt

---
 doc/source/whatsnew/v0.16.0.txt | 63 ++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index c0a89b5fe7b8f..b161469f6c663 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -130,33 +130,54 @@ methods (:issue:`9088`).
     d    7
     dtype: int64
 
- - default behavior for HDF write functions is now to keep rows that are all missing except for index. (:issue:`9382`)
+- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
  
  Previously,
+ 
+  .. code-block:: python
+    In [1]: 
+     df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+     df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+     
+     df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
+     df_without_missing.to_hdf('file.h5', 'df_without_missing')
+     
+     print(pd.read_hdf('file.h5', 'df_with_missing'))
+     print(pd.read_hdf('file.h5', 'df_without_missing'))
 
-  .. ipython:: python
-    In [1]: myFile = HDFStore('file.hdf')
-            seriesWithMissingRow = pd.Series([0, np.nan, 2], index = ['user1', 'user2', 'user3'])
-            myFile.append('fileKey', seriesWithMissingRow, append = False)
-            myFile['fileKey']
+   Out [1]: 
+         col1  col2
+     0     0     1
+     2     2     3
+        col1  col2
+     0     0     1
+     1    -1    -1
+     2     2     3
 
-    Out[1]:
-           user1    0
-           user3    2
-           dtype: float64
 
- New behavior:
-  .. ipython:: python
-    In [2]: myFile = HDFStore('file.hdf')
-            seriesWithMissingRow = pd.Series([0, np.nan, 2], index = ['user1', 'user2', 'user3'])
-            myFile.append('fileKey', seriesWithMissingRow, append = False)
-            myFile['fileKey']
 
-    Out[2]:
-            user1     0
-            user2   NaN
-            user3     2
-            dtype: float64
+New behavior: do 
+
+  .. code-block:: python
+    In [1]: 
+     df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+     df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+     
+     df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
+     df_without_missing.to_hdf('file.h5', 'df_without_missing')
+     
+     print(pd.read_hdf('file.h5', 'df_with_missing'))
+     print(pd.read_hdf('file.h5', 'df_without_missing'))
+
+   Out [2]: 
+          col1  col2
+     0     0     1
+     1   NaN   NaN
+     2     2     3
+         col1  col2
+     0     0     1
+     1    -1    -1
+     2     2     3
 
 
 Deprecations

From 3e2a718ad044a7ea6810645c8b51a07e1ae0dc82 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Mon, 16 Feb 2015 19:30:34 -0800
Subject: [PATCH 4/9] Test for change of default setting for dropna

---
 pandas/io/tests/test_pytables.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index e95d46f66f17f..ff6b49ccfb622 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4678,6 +4678,13 @@ def test_duplicate_column_name(self):
             other = read_hdf(path, 'df')
             tm.assert_frame_equal(df, other)
 
+    def test_all_missing_values(self):
+        df_with_missing = pd.DataFrame({'col1':[np.nan]})
+        
+        with ensure_clean_path(self.path) as path:
+            df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
+            reloaded = pd.read_hdf(path, 'df_with_missing')
+            tm.assert_frame_equal(df_with_missing, reloaded)
 
 def _test_sort(obj):
     if isinstance(obj, DataFrame):

From de022a92057ec652f09fe0af10b436a37128194d Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Tue, 17 Feb 2015 11:56:47 -0800
Subject: [PATCH 5/9] dropped pd. prefix for pandas operations.

---
 pandas/io/tests/test_pytables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index ff6b49ccfb622..1a825df1a626a 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4679,11 +4679,11 @@ def test_duplicate_column_name(self):
             tm.assert_frame_equal(df, other)
 
     def test_all_missing_values(self):
-        df_with_missing = pd.DataFrame({'col1':[np.nan]})
+        df_with_missing = DataFrame({'col1':[np.nan]})
         
         with ensure_clean_path(self.path) as path:
             df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
-            reloaded = pd.read_hdf(path, 'df_with_missing')
+            reloaded = read_hdf(path, 'df_with_missing')
             tm.assert_frame_equal(df_with_missing, reloaded)
 
 def _test_sort(obj):

From 5f2eae8e239dfee150ff00fd679b2eed9412e72a Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Tue, 17 Feb 2015 12:03:13 -0800
Subject: [PATCH 6/9] More complicated data frame object.

---
 pandas/io/tests/test_pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 1a825df1a626a..88261bf458cdc 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4679,7 +4679,7 @@ def test_duplicate_column_name(self):
             tm.assert_frame_equal(df, other)
 
     def test_all_missing_values(self):
-        df_with_missing = DataFrame({'col1':[np.nan]})
+        df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
         
         with ensure_clean_path(self.path) as path:
             df_with_missing.to_hdf(path, 'df_with_missing', format = 't')

From 892835b285a2a0ecd43791973cf7afb565988a36 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Tue, 17 Feb 2015 16:54:13 -0800
Subject: [PATCH 7/9] add issue number in comment

---
 pandas/io/tests/test_pytables.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 88261bf458cdc..736d8a5b2bb90 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4679,6 +4679,7 @@ def test_duplicate_column_name(self):
             tm.assert_frame_equal(df, other)
 
     def test_all_missing_values(self):
+        # Test corresponding to Issue 9382
         df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
         
         with ensure_clean_path(self.path) as path:

From 137c4c0b6545e4244d51e12ce7409129fc622e60 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@users.noreply.github.com>
Date: Tue, 17 Feb 2015 16:58:18 -0800
Subject: [PATCH 8/9] Updated to reflect suggested changes by Jeff

---
 doc/source/whatsnew/v0.16.0.txt | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index b161469f6c663..1e08ece251fdc 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -142,8 +142,8 @@ methods (:issue:`9088`).
      df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
      df_without_missing.to_hdf('file.h5', 'df_without_missing')
      
-     print(pd.read_hdf('file.h5', 'df_with_missing'))
-     print(pd.read_hdf('file.h5', 'df_without_missing'))
+     pd.read_hdf('file.h5', 'df_with_missing')
+     pd.read_hdf('file.h5', 'df_without_missing')
 
    Out [1]: 
          col1  col2
@@ -156,28 +156,17 @@ methods (:issue:`9088`).
 
 
 
-New behavior: do 
+New behavior: 
 
-  .. code-block:: python
-    In [1]: 
+  .. ipython-block:: python
      df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
      df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
      
      df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
      df_without_missing.to_hdf('file.h5', 'df_without_missing')
      
-     print(pd.read_hdf('file.h5', 'df_with_missing'))
-     print(pd.read_hdf('file.h5', 'df_without_missing'))
-
-   Out [2]: 
-          col1  col2
-     0     0     1
-     1   NaN   NaN
-     2     2     3
-         col1  col2
-     0     0     1
-     1    -1    -1
-     2     2     3
+     pd.read_hdf('file.h5', 'df_with_missing')
+     pd.read_hdf('file.h5', 'df_without_missing')
 
 
 Deprecations

From 1a119d24cd6a70124fd796566bb676ee4e4b5636 Mon Sep 17 00:00:00 2001
From: Nick Eubank <nickeubank@gmail.com>
Date: Thu, 26 Mar 2015 19:32:02 -0700
Subject: [PATCH 9/9] moved docs to whatsnew 16.1 from 16.0

---
 doc/source/whatsnew/v0.16.1.txt | 40 +++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 3c3742c968642..012a35d9d2f6e 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -29,6 +29,46 @@ API changes
 
 
 
+Backwards incompatible API changes
+~~~~~~~~~~~
+- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
+ 
+ Previously,
+ 
+  .. code-block:: python
+    In [1]: 
+     df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+     df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+     
+     df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
+     df_without_missing.to_hdf('file.h5', 'df_without_missing')
+     
+     pd.read_hdf('file.h5', 'df_with_missing')
+     pd.read_hdf('file.h5', 'df_without_missing')
+
+   Out [1]: 
+         col1  col2
+     0     0     1
+     2     2     3
+        col1  col2
+     0     0     1
+     1    -1    -1
+     2     2     3
+
+
+
+New behavior: 
+
+  .. ipython-block:: python
+     df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+     df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+     
+     df_without_missing = pd.DataFrame({'col1':[0, -1, 2], 'col2':[1, -1, 3]})
+     df_without_missing.to_hdf('file.h5', 'df_without_missing')
+     
+     pd.read_hdf('file.h5', 'df_with_missing')
+     pd.read_hdf('file.h5', 'df_without_missing')
+