From c52c237a5ab8127bc8ffeeaa0b9679d881ced21a Mon Sep 17 00:00:00 2001 From: Francis Cong Date: Thu, 13 Jan 2022 22:46:53 -0500 Subject: [PATCH 1/3] BUG: No error raised in to_stata() for -np.inf #45350 * Add an absolute function when checking infinity, which includes both np.inf and -np.inf. Also changed the error message a bit. * Add similar tests but replacing np.inf with -np.inf and change the correct error message accordingly --- pandas/io/stata.py | 5 +++-- pandas/tests/io/test_stata.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4a50a3dabe5e7..41093cb398a9d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -625,10 +625,11 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame: if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc.format("int64", "float64") elif dtype in (np.float32, np.float64): - value = data[col].max() + value = np.abs(data[col]).max() if np.isinf(value): raise ValueError( - f"Column {col} has a maximum value of infinity which is outside " + f"Column {col} has a maximum value of infinity " + "or a minimum value of -infinity which is outside " "the range supported by Stata." ) if dtype == np.float32 and value > float32_max: diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index f0fd391c2a9c4..e10ffd9c7fd19 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1475,8 +1475,19 @@ def test_out_of_range_double(self): df.loc[2, "ColumnTooBig"] = np.inf msg = ( - "Column ColumnTooBig has a maximum value of infinity which is outside " - "the range supported by Stata" + "Column ColumnTooBig has a maximum value of infinity " + "or a minimum value of -infinity which is outside " + "the range supported by Stata." + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + df.to_stata(path) + + df.loc[2, "ColumnTooBig"] = -np.inf + msg = ( + "Column ColumnTooBig has a maximum value of infinity " + "or a minimum value of -infinity which is outside " + "the range supported by Stata." ) with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: @@ -1509,8 +1520,19 @@ def test_out_of_range_float(self): original.loc[2, "ColumnTooBig"] = np.inf msg = ( - "Column ColumnTooBig has a maximum value of infinity which " - "is outside the range supported by Stata" + "Column ColumnTooBig has a maximum value of infinity " + "or a minimum value of -infinity which is outside " + "the range supported by Stata." + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path) + + original.loc[2, "ColumnTooBig"] = -np.inf + msg = ( + "Column ColumnTooBig has a maximum value of infinity " + "or a minimum value of -infinity which is outside " + "the range supported by Stata." ) with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: From ada06644f28bca5cdea42d1d8a24749e933b8138 Mon Sep 17 00:00:00 2001 From: Francis Cong Date: Fri, 14 Jan 2022 00:00:07 -0500 Subject: [PATCH 2/3] BUG: Update #45350 * Change the fix to make it faster. * Group the two test for DataFrame containing np.inf or -np.inf into a separate test function. * Update doc/source/whatsnew/v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/io/stata.py | 9 ++++---- pandas/tests/io/test_stata.py | 42 ++++++---------------------------- 3 files changed, 12 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9a6455d4d012f..852d24a3d1c4e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -171,7 +171,7 @@ MultiIndex I/O ^^^ -- +- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`) - Period diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 41093cb398a9d..e643404629a6d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -625,12 +625,11 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame: if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc.format("int64", "float64") elif dtype in (np.float32, np.float64): - value = np.abs(data[col]).max() - if np.isinf(value): + value = data[col].max() + if np.isinf(data[col]).any(): raise ValueError( - f"Column {col} has a maximum value of infinity " - "or a minimum value of -infinity which is outside " - "the range supported by Stata." + f"Column {col} contains infinity or -infinity" + "which is outside the range supported by Stata." ) if dtype == np.float32 and value > float32_max: data[col] = data[col].astype(np.float64) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index e10ffd9c7fd19..4b6643cfa3903 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1473,26 +1473,6 @@ def test_out_of_range_double(self): with tm.ensure_clean() as path: df.to_stata(path) - df.loc[2, "ColumnTooBig"] = np.inf - msg = ( - "Column ColumnTooBig has a maximum value of infinity " - "or a minimum value of -infinity which is outside " - "the range supported by Stata." - ) - with pytest.raises(ValueError, match=msg): - with tm.ensure_clean() as path: - df.to_stata(path) - - df.loc[2, "ColumnTooBig"] = -np.inf - msg = ( - "Column ColumnTooBig has a maximum value of infinity " - "or a minimum value of -infinity which is outside " - "the range supported by Stata." - ) - with pytest.raises(ValueError, match=msg): - with tm.ensure_clean() as path: - df.to_stata(path) - def test_out_of_range_float(self): original = DataFrame( { @@ -1518,25 +1498,17 @@ def test_out_of_range_float(self): original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) tm.assert_frame_equal(original, reread.set_index("index")) - original.loc[2, "ColumnTooBig"] = np.inf - msg = ( - "Column ColumnTooBig has a maximum value of infinity " - "or a minimum value of -infinity which is outside " - "the range supported by Stata." - ) - with pytest.raises(ValueError, match=msg): - with tm.ensure_clean() as path: - original.to_stata(path) - - original.loc[2, "ColumnTooBig"] = -np.inf + @pytest.mark.parametrize("infval", [np.inf, -np.inf]) + def test_inf(self, infval): + # GH 45350 + df = DataFrame({"WithoutInf": [0.0, 1.0], "WithInf": [2.0, infval]}) msg = ( - "Column ColumnTooBig has a maximum value of infinity " - "or a minimum value of -infinity which is outside " - "the range supported by Stata." + "Column WithInf contains infinity or -infinity" + "which is outside the range supported by Stata." ) with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: - original.to_stata(path) + df.to_stata(path) def test_path_pathlib(self): df = tm.makeDataFrame() From 3f195eac221473f53e3e8f52ebfc95c82ad2e0d9 Mon Sep 17 00:00:00 2001 From: Francis Cong Date: Fri, 14 Jan 2022 00:11:12 -0500 Subject: [PATCH 3/3] BUG: Update #45350 * Minor change in code style. --- pandas/io/stata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index e643404629a6d..e0a070f051534 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -625,12 +625,12 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame: if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc.format("int64", "float64") elif dtype in (np.float32, np.float64): - value = data[col].max() if np.isinf(data[col]).any(): raise ValueError( f"Column {col} contains infinity or -infinity" "which is outside the range supported by Stata." ) + value = data[col].max() if dtype == np.float32 and value > float32_max: data[col] = data[col].astype(np.float64) elif dtype == np.float64: