diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7111d54d65815..5fedd57cd2bc8 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -492,6 +492,7 @@ I/O - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 347ce6e853794..bf21a8fe2fc74 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -268,6 +268,7 @@ def to_hdf( data_columns=data_columns, errors=errors, encoding=encoding, + dropna=dropna, ) path_or_buf = stringify_path(path_or_buf) @@ -1051,6 +1052,7 @@ def put( encoding=None, errors: str = "strict", track_times: bool = True, + dropna: bool = False, ): """ Store object in HDFStore. @@ -1100,6 +1102,7 @@ def put( encoding=encoding, errors=errors, track_times=track_times, + dropna=dropna, ) def remove(self, key: str, where=None, start=None, stop=None): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f37b0aabd3aed..d76a5a6f64055 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1253,17 +1253,32 @@ def test_append_all_nans(self, setup_path): store.append("df2", df[10:], dropna=False) tm.assert_frame_equal(store["df2"], df) - # Test to make sure defaults are to not drop. - # Corresponding to Issue 9382 + def test_store_dropna(self, setup_path): df_with_missing = DataFrame( - {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} + {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]}, + index=list("abc"), ) + df_without_missing = DataFrame( + {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac") + ) + + # # Test to make sure defaults are to not drop. + # # Corresponding to Issue 9382 + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table") + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df_with_missing", format="table") - reloaded = read_hdf(path, "df_with_missing") + df_with_missing.to_hdf(path, "df", format="table", dropna=False) + reloaded = read_hdf(path, "df") tm.assert_frame_equal(df_with_missing, reloaded) + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table", dropna=True) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_without_missing, reloaded) + def test_read_missing_key_close_store(self, setup_path): # GH 25766 with ensure_clean_path(setup_path) as path: