From a9a702ec18d9dbba66f12c9366664a57fe1bfa85 Mon Sep 17 00:00:00 2001 From: Shawn Heide Date: Sat, 8 Oct 2016 11:46:27 -0700 Subject: [PATCH] BUG: astype falsely converts inf to integer, patch for Numpy (GH14265) (+2 squashed commit) Squashed commits: Update test messages to catch new ValueError Change sparse test for astype to assertRaises(ValueError) instead of regex version --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/sparse/tests/test_array.py | 2 +- pandas/tests/frame/test_dtypes.py | 14 +++++++++++--- pandas/tests/series/test_dtypes.py | 14 +++++++++++--- pandas/types/cast.py | 6 ++++-- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f534c67273560..8fdef39a3ae98 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -118,3 +118,5 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + +- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) \ No newline at end of file diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 1c9b6119cf665..f210f70ad1940 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -361,7 +361,7 @@ def test_astype(self): arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1], fill_value=0) - msg = "Cannot convert NA to integer" + msg = 'Cannot convert non-finite values \(NA or inf\) to integer' with tm.assertRaisesRegexp(ValueError, msg): arr.astype('i8') diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 817770b9da610..61030c262a44b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -353,9 +353,17 @@ def test_astype_with_view(self): tf = self.frame.astype(np.float64) casted = tf.astype(np.int64, copy=False) # noqa - def test_astype_cast_nan_int(self): - df = DataFrame(data={"Values": [1.0, 2.0, 3.0, np.nan]}) - self.assertRaises(ValueError, df.astype, np.int64) + def test_astype_cast_nan_inf_int(self): + # GH14265, check nan and inf raise error when converting to int + types = [np.int32, np.int64] + values = [np.nan, np.inf] + msg = 'Cannot convert non-finite values \(NA or inf\) to integer' + + for this_type in types: + for this_val in values: + df = DataFrame([this_val]) + with tm.assertRaisesRegexp(ValueError, msg): + df.astype(this_type) def test_astype_str(self): # GH9757 diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 9a406dfa10c35..3eafbaf912797 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -42,9 +42,17 @@ def test_dtype(self): assert_series_equal(self.ts.get_ftype_counts(), Series( 1, ['float64:dense'])) - def test_astype_cast_nan_int(self): - df = Series([1.0, 2.0, 3.0, np.nan]) - self.assertRaises(ValueError, df.astype, np.int64) + def test_astype_cast_nan_inf_int(self): + # GH14265, check nan and inf raise error when converting to int + types = [np.int32, np.int64] + values = [np.nan, np.inf] + msg = 'Cannot convert non-finite values \(NA or inf\) to integer' + + for this_type in types: + for this_val in values: + s = Series([this_val]) + with self.assertRaisesRegexp(ValueError, msg): + s.astype(this_type) def test_astype_cast_object_int(self): arr = Series(["car", "house", "tree", "1"]) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index a79862eb195b6..d4beab5655e5c 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -527,8 +527,10 @@ def _astype_nansafe(arr, dtype, copy=True): elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): - if np.isnan(arr).any(): - raise ValueError('Cannot convert NA to integer') + if not np.isfinite(arr).all(): + raise ValueError('Cannot convert non-finite values (NA or inf) to ' + 'integer') + elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)