pandas-dev · WillAyd · May 3, 2019 · May 3, 2019 · May 3, 2019 · jreback
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -1,11 +1,12 @@
 import copy
 import sys
-from typing import Type
+from typing import Sequence, Type
 import warnings
 
 import numpy as np
 
 from pandas._libs import lib
+from pandas._typing import Dtype
 from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
@@ -304,9 +305,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         return integer_array(scalars, dtype=dtype, copy=copy)
 
     @classmethod
-    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
-        scalars = to_numeric(strings, errors="raise")
-        return cls._from_sequence(scalars, dtype, copy)
+    def _from_sequence_of_strings(cls,
+                                  strings: Sequence[str],
+                                  dtype: Dtype = None,
+                                  copy: bool = False) -> 'IntegerArray':
+        # Mask the NA location before sending to to_numeric to prevent
+        # undesirable cast to float which may lose precision
+        mask = isna(strings)
+        masked_strings = np.where(mask, 0, strings)
+
+        scalars = to_numeric(masked_strings, errors="raise")
+
+        return IntegerArray(scalars, mask)
 
     @classmethod
     def _from_factorized(cls, values, original):

diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py
@@ -509,3 +509,27 @@ def test_numeric_dtype(all_parsers, dtype):
 
     result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
     tm.assert_frame_equal(expected, result)
+
+
+def test_intna_precision(all_parsers):
+    parser = all_parsers
+    data = "1556559573141592653\n1556559573141592654\n\n1556559573141592655"
+    dtype = 'Int64'
+
+    expected = DataFrame([
+        [1556559573141592653],
+        [1556559573141592654],
+        [0],
+        [1556559573141592655],
+    ], dtype=dtype)
+    expected.iloc[2] = np.nan  # TODO: fix general bug on df construction
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype,
+                             skip_blank_lines=False)
+
+    tm.assert_frame_equal(result, expected)
+
+    # See why tm.assert_frame_equal doesn't fail...
+    assert result.iloc[0] == expected.iloc[0]
+    assert result.iloc[1] == expected.iloc[1]
+    assert result.iloc[3] == expected.iloc[3]