diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt index ee0ee4259f86d..6a36adb915b3c 100644 --- a/doc/source/whatsnew/v0.23.5.txt +++ b/doc/source/whatsnew/v0.23.5.txt @@ -37,3 +37,7 @@ Bug Fixes - - + +**I/O** + +- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bcde32696c1ff..9d8d208d2d5c1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -95,7 +95,7 @@ def _ensure_data(values, dtype=None): values = _ensure_float64(values) return values, 'float64', 'float64' - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): # if we are trying to coerce to a dtype # and it is incompat this will fall thru to here return _ensure_object(values), 'object', 'object' @@ -429,7 +429,7 @@ def isin(comps, values): values = values.astype('int64', copy=False) comps = comps.astype('int64', copy=False) f = lambda x, y: htable.ismember_int64(x, y) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): values = values.astype(object) comps = comps.astype(object) diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index d2c3f82e95c4d..cc224efd533b7 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -369,3 +369,14 @@ def test_no_na_filter_on_index(self): expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index([np.nan, 5.0], name="b")) tm.assert_frame_equal(out, expected) + + def test_inf_na_values_with_int_index(self): + # see gh-17128 + data = "idx,col1,col2\n1,3,4\n2,inf,-inf" + + # Don't fail with OverflowError with infs and integer index column + out = self.read_csv(StringIO(data), index_col=[0], + na_values=['inf', '-inf']) + expected = DataFrame({"col1": [3, np.nan], "col2": [4, np.nan]}, + index=Index([1, 2], name="idx")) + tm.assert_frame_equal(out, expected)