diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7054d93457264..b95f799750367 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1023,6 +1023,7 @@ I/O - Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`) - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) +- Bug in :func:`read_csv` unnecessarily overflowing for extension array dtype when containing ``NA`` (:issue:`32134`) - Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) - Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`) diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 6ae2c3a2e2749..2d9a3ae63259d 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -285,7 +285,7 @@ def _from_sequence_of_strings( ) -> T: from pandas.core.tools.numeric import to_numeric - scalars = to_numeric(strings, errors="raise") + scalars = to_numeric(strings, errors="raise", use_nullable_dtypes=True) return cls._from_sequence(scalars, dtype=dtype, copy=copy) _HANDLED_TYPES = (np.ndarray, numbers.Number) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 8fd08122f0834..52b142d81cd5e 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -18,6 +18,7 @@ import pandas._testing as tm from pandas.core.arrays import ( ArrowStringArray, + IntegerArray, StringArray, ) @@ -527,3 +528,23 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request): } ) tm.assert_frame_equal(result, expected) + + +def test_ea_int_avoid_overflow(all_parsers): + # GH#32134 + parser = all_parsers + data = """a,b +1,1 +,1 +1582218195625938945,1 +""" + result = parser.read_csv(StringIO(data), dtype={"a": "Int64"}) + expected = DataFrame( + { + "a": IntegerArray( + np.array([1, 1, 1582218195625938945]), np.array([False, True, False]) + ), + "b": 1, + } + ) + tm.assert_frame_equal(result, expected)