From f42b65ad7fa3345f03e6cb45907de08cee60cccf Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 15:33:18 -0500
Subject: [PATCH 1/8] WIP

---
 pandas/_libs/src/parser/tokenizer.c   | 11 +++++++----
 pandas/tests/io/parser/test_common.py | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
index 965fece370721..1b229171ea879 100644
--- a/pandas/_libs/src/parser/tokenizer.c
+++ b/pandas/_libs/src/parser/tokenizer.c
@@ -1726,7 +1726,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
         // Process string of digits.
         num_digits = 0;
         n = 0;
-        while (isdigit_ascii(*p)) {
+        while (num_digits < max_digits && isdigit_ascii(*p)) {
             n = n * 10 + (*p - '0');
             num_digits++;
             p++;
@@ -1747,10 +1747,13 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
     } else if (exponent > 0) {
         number *= e[exponent];
     } else if (exponent < -308) {  // Subnormal
-        if (exponent < -616)       // Prevent invalid array access.
+        if (exponent < -616) {  // Prevent invalid array access.
             number = 0.;
-        number /= e[-308 - exponent];
-        number /= e[308];
+        } else {
+            number /= e[-308 - exponent];
+            number /= e[308];
+        }
+
     } else {
         number /= e[-exponent];
     }
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index ce3557e098bfd..e85ebd22ffe81 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1351,6 +1351,30 @@ def test_numeric_range_too_wide(all_parsers, exp_data):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999])
+def test_very_negative_exponent(all_parsers, neg_exp):
+    # GH#38753
+    parser = all_parsers
+    data = f"data\n10E{neg_exp}"
+    for precision in parser.float_precision_choices:
+        result = parser.read_csv(StringIO(data), float_precision=precision)
+        expected = DataFrame({"data": [0.0]})
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
+def test_too_many_exponent_digits(all_parsers, exp):
+    # GH#38753
+    parser = all_parsers
+    data = f"data\n10E{exp}"
+    for precision in parser.float_precision_choices:
+        if precision == "round_trip":
+            continue
+        result = parser.read_csv(StringIO(data), float_precision=precision)
+        expected = DataFrame({"data": [f"10E{exp}"]})
+        tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("iterator", [True, False])
 def test_empty_with_nrows_chunksize(all_parsers, iterator):
     # see gh-9535

From bb77ab213b5dcf084cb5441e30cf767246d30bc2 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 15:52:16 -0500
Subject: [PATCH 2/8] BUG: precise_xstrtod segfault

---
 doc/source/whatsnew/v1.2.1.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index 649b17e255f3d..b0dd7175680f9 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -17,6 +17,7 @@ Fixed regressions
 - The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`)
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
+- Bug in :meth:`read_csv` with `float_precision="high"` caused segfault or wrong parsing of long exponents strings (:issue:`38753`)
 -
 
 .. ---------------------------------------------------------------------------

From 3a0ac2e5c86ab017bcfa80bb2fe53ac5af01ac85 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 15:57:49 -0500
Subject: [PATCH 3/8] Fix typo

---
 doc/source/whatsnew/v1.2.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index b0dd7175680f9..85d2aa0ecd262 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -17,7 +17,7 @@ Fixed regressions
 - The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`)
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
-- Bug in :meth:`read_csv` with `float_precision="high"` caused segfault or wrong parsing of long exponents strings (:issue:`38753`)
+- Bug in :meth:`read_csv` with `float_precision="high"` caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
 -
 
 .. ---------------------------------------------------------------------------

From 7b0cc6226eaeb84c905d2c11f138b65150a44f93 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 16:08:35 -0500
Subject: [PATCH 4/8] Fix whatsnew

---
 doc/source/whatsnew/v1.2.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index 85d2aa0ecd262..2e9c5ed8e6839 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -17,7 +17,7 @@ Fixed regressions
 - The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`)
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
-- Bug in :meth:`read_csv` with `float_precision="high"` caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
+- Bug in :meth:`read_csv` with ``float_precision``="high" caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
 -
 
 .. ---------------------------------------------------------------------------

From 39dd79c0b677fd1342818d675e2d5ccfe1e970fc Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 20:42:29 -0500
Subject: [PATCH 5/8] Fix quotes

---
 doc/source/whatsnew/v1.2.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index dfe7823f08c6e..5e6ef82f78ce1 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -19,7 +19,7 @@ Fixed regressions
 - Fixed a regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
 - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
-- Bug in :meth:`read_csv` with ``float_precision``="high" caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
+- Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
 
 .. ---------------------------------------------------------------------------
 

From 58bbed5b72a1f4396d5669830354e9d0df8985ba Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 29 Dec 2020 21:04:37 -0500
Subject: [PATCH 6/8] xfail inconsistent test

---
 pandas/tests/io/parser/test_common.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index c7ef653533357..6bc778f50937e 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -15,6 +15,7 @@
 import pytest
 
 from pandas._libs.tslib import Timestamp
+from pandas.compat import is_platform_linux
 from pandas.errors import DtypeWarning, EmptyDataError, ParserError
 import pandas.util._test_decorators as td
 
@@ -1361,12 +1362,16 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
 
 
 @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
-def test_too_many_exponent_digits(all_parsers_all_precisions, exp):
+def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
     # GH#38753
     parser, precision = all_parsers_all_precisions
     data = f"data\n10E{exp}"
     result = parser.read_csv(StringIO(data), float_precision=precision)
     if precision == "round_trip":
+        if exp == 999999999999999999 and is_platform_linux():
+            mark = pytest.mark.xfail(reason="On Linux gives object result")
+            request.node.add_marker(mark)
+
         value = np.inf if exp > 0 else 0.0
         expected = DataFrame({"data": [value]})
     else:

From 0f5f1033cea21383bf33173b67eb43cbe97d4169 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 10:46:47 -0500
Subject: [PATCH 7/8] Add issue number to xfail

---
 pandas/tests/io/parser/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 6bc778f50937e..31f1581a6184b 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1369,7 +1369,7 @@ def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
     result = parser.read_csv(StringIO(data), float_precision=precision)
     if precision == "round_trip":
         if exp == 999999999999999999 and is_platform_linux():
-            mark = pytest.mark.xfail(reason="On Linux gives object result")
+            mark = pytest.mark.xfail(reason="GH38794, on Linux gives object result")
             request.node.add_marker(mark)
 
         value = np.inf if exp > 0 else 0.0

From 0a48ed843efdc03a40b2611b255473617fdaeb26 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Wed, 30 Dec 2020 10:49:29 -0500
Subject: [PATCH 8/8] Keep extra line

---
 doc/source/whatsnew/v1.2.1.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index 97ea9b11b884e..3ecea674fd34c 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -21,6 +21,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
 - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`)
 - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
+-
 
 .. ---------------------------------------------------------------------------