From 1e8fb8c6cea0a6306efbccf506da527559aec5d7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 19:06:07 +0000 Subject: [PATCH 1/5] TST: add tests for to_hdf with dropna arg --- pandas/tests/io/pytables/test_store.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f37b0aabd3aed..d76a5a6f64055 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1253,17 +1253,32 @@ def test_append_all_nans(self, setup_path): store.append("df2", df[10:], dropna=False) tm.assert_frame_equal(store["df2"], df) - # Test to make sure defaults are to not drop. - # Corresponding to Issue 9382 + def test_store_dropna(self, setup_path): df_with_missing = DataFrame( - {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} + {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]}, + index=list("abc"), ) + df_without_missing = DataFrame( + {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac") + ) + + # # Test to make sure defaults are to not drop. + # # Corresponding to Issue 9382 + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table") + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df_with_missing", format="table") - reloaded = read_hdf(path, "df_with_missing") + df_with_missing.to_hdf(path, "df", format="table", dropna=False) + reloaded = read_hdf(path, "df") tm.assert_frame_equal(df_with_missing, reloaded) + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table", dropna=True) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_without_missing, reloaded) + def test_read_missing_key_close_store(self, setup_path): # GH 25766 with ensure_clean_path(setup_path) as path: From d6e768175ce6324daf9af1505a46908d24595c5e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 19:06:14 +0000 Subject: [PATCH 2/5] BUG: add missing val handling to HDFStore.put --- pandas/io/pytables.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 347ce6e853794..18808545caf25 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -268,6 +268,7 @@ def to_hdf( data_columns=data_columns, errors=errors, encoding=encoding, + dropna=dropna, ) path_or_buf = stringify_path(path_or_buf) @@ -1051,6 +1052,7 @@ def put( encoding=None, errors: str = "strict", track_times: bool = True, + dropna: bool = Optional[None], ): """ Store object in HDFStore. @@ -1100,6 +1102,7 @@ def put( encoding=encoding, errors=errors, track_times=track_times, + dropna=dropna, ) def remove(self, key: str, where=None, start=None, stop=None): From d575b3af67b85568499d3c5655598a26217e29de Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 19:18:50 +0000 Subject: [PATCH 3/5] DOC: add whatsnew --- doc/source/whatsnew/v1.1.5.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index cf728d94b2a55..be81784dc663c 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -14,8 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- -- +- Fixed regression in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) .. --------------------------------------------------------------------------- From b47793128bd8c89cf13f7f1f70139bbca5459ab7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 21:55:43 +0000 Subject: [PATCH 4/5] TYP: fix type hint to silence mypy complaint --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 18808545caf25..bf21a8fe2fc74 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1052,7 +1052,7 @@ def put( encoding=None, errors: str = "strict", track_times: bool = True, - dropna: bool = Optional[None], + dropna: bool = False, ): """ Store object in HDFStore. From 379032e15c63bdce559cd9138915a1efa0c6a33b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 2 Nov 2020 01:47:10 +0000 Subject: [PATCH 5/5] DOC (feedback): move whatsnew --- doc/source/whatsnew/v1.1.5.rst | 3 ++- doc/source/whatsnew/v1.2.0.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index be81784dc663c..cf728d94b2a55 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -14,7 +14,8 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Fixed regression in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) +- +- .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84f594acf5e4c..b695d4cdd4187 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -489,6 +489,7 @@ I/O - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) Plotting ^^^^^^^^