From f49d007d9d093926a25d4e27aa3e29bb2c020bda Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 01:44:32 +0100
Subject: [PATCH 01/10] BUG: read_csv not recognizing numbers appropriately
 when decimal is set

---
 doc/source/whatsnew/v1.3.0.rst                |  2 +-
 pandas/io/parsers.py                          | 11 +++-
 .../io/parser/test_python_parser_only.py      | 55 +++++++++++++++++++
 3 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 90f611c55e710..1db88fec0a326 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -146,7 +146,7 @@ MultiIndex
 I/O
 ^^^
 
--
+- Bug in :func:`read_csv` not recognizing scientific notation if decimal is set (:issue:`31920`)
 -
 
 Period
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 5b623c360c3ef..4f49bb34c2aa0 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2346,9 +2346,14 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             raise ValueError("Only length-1 decimal markers supported")
 
         if self.thousands is None:
-            self.nonnum = re.compile(fr"[^-^0-9^{self.decimal}]+")
+            regex = fr"^\-?[0-9]+({self.decimal}[0-9]*)?((E|e)\-?[0-9]*)?$"
+            self.nonnum = re.compile(regex)
         else:
-            self.nonnum = re.compile(fr"[^-^0-9^{self.thousands}^{self.decimal}]+")
+            thousands = self.thousands
+            if "." == thousands:
+                thousands = fr"\{thousands}"
+            regex = fr"^\-?[0-9]+({thousands}|[0-9])*({self.decimal}[0-9]*)?((E|e)\-?[0-9]*)?$"
+            self.nonnum = re.compile(regex)
 
     def _set_no_thousands_columns(self):
         # Create a set of column ids that are not to be stripped of thousands
@@ -3044,7 +3049,7 @@ def _search_replace_num_columns(self, lines, search, replace):
                     not isinstance(x, str)
                     or search not in x
                     or (self._no_thousands_columns and i in self._no_thousands_columns)
-                    or self.nonnum.search(x.strip())
+                    or not self.nonnum.search(x.strip())
                 ):
                     rl.append(x)
                 else:
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 016fae4f4a6f5..29fbf0ece7d26 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -314,3 +314,58 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+@pytest.mark.parametrize("thousands", [None, "."])
+@pytest.mark.parametrize(
+    "value, result_value",
+    [
+        ("1,2", 1.2),
+        ("1,2e-1", 0.12),
+        ("1,2E-1", 0.12),
+        ("1,2e-10", 0.0000000012),
+        ("1,2e1", 12.0),
+        ("1,2E1", 12.0),
+        ("-1,2e-1", -0.12),
+        ("0,2", 0.2),
+    ],
+)
+def test_decimal_and_exponential(python_parser_only, thousands, value, result_value):
+    # GH#31920
+    data = StringIO(
+        f"""a	b
+    1,1	{value}
+    """
+    )
+    result = python_parser_only.read_csv(
+        data, "\t", decimal=",", engine="python", thousands=thousands
+    )
+    expected = DataFrame({"a": [1.1], "b": [result_value]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("thousands", [None, "."])
+@pytest.mark.parametrize(
+    "value",
+    [
+        "e11,2",
+        "1e11,2",
+        "1,2,2",
+        "1,2.1",
+        "1,2e-10e1",
+        "--1,2",
+        "1a.2,1",
+    ],
+)
+def test_decimal_and_exponential_erroneous(python_parser_only, thousands, value):
+    # GH#31920
+    data = StringIO(
+        f"""a	b
+    1,1	{value}
+    """
+    )
+    result = python_parser_only.read_csv(
+        data, "\t", decimal=",", engine="python", thousands=thousands
+    )
+    expected = DataFrame({"a": [1.1], "b": [value]})
+    tm.assert_frame_equal(result, expected)

From cc7dd1b909f7b7026a5a1a411c2995b3d26059fe Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 01:50:35 +0100
Subject: [PATCH 02/10] Add corner case

---
 pandas/io/parsers.py                              | 4 ++--
 pandas/tests/io/parser/test_python_parser_only.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4f49bb34c2aa0..f8d6cd31b97c9 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2346,13 +2346,13 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             raise ValueError("Only length-1 decimal markers supported")
 
         if self.thousands is None:
-            regex = fr"^\-?[0-9]+({self.decimal}[0-9]*)?((E|e)\-?[0-9]*)?$"
+            regex = fr"^\-?[0-9]*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
             self.nonnum = re.compile(regex)
         else:
             thousands = self.thousands
             if "." == thousands:
                 thousands = fr"\{thousands}"
-            regex = fr"^\-?[0-9]+({thousands}|[0-9])*({self.decimal}[0-9]*)?((E|e)\-?[0-9]*)?$"
+            regex = fr"^\-?([0-9]|{thousands})*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
             self.nonnum = re.compile(regex)
 
     def _set_no_thousands_columns(self):
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 29fbf0ece7d26..c6aed0f34ed96 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -328,6 +328,7 @@ def test_malformed_skipfooter(python_parser_only):
         ("1,2E1", 12.0),
         ("-1,2e-1", -0.12),
         ("0,2", 0.2),
+        (",2", 0.2),
     ],
 )
 def test_decimal_and_exponential(python_parser_only, thousands, value, result_value):

From 189ca80fc0e146ac17efef27b4c40e0f51a6074e Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 01:53:17 +0100
Subject: [PATCH 03/10] Add another corner case

---
 pandas/io/parsers.py                              | 2 +-
 pandas/tests/io/parser/test_python_parser_only.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f8d6cd31b97c9..18ab98a41f708 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2352,7 +2352,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             thousands = self.thousands
             if "." == thousands:
                 thousands = fr"\{thousands}"
-            regex = fr"^\-?([0-9]|{thousands})*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
+            regex = fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
             self.nonnum = re.compile(regex)
 
     def _set_no_thousands_columns(self):
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index c6aed0f34ed96..ce3f64f1ba8b0 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -356,6 +356,7 @@ def test_decimal_and_exponential(python_parser_only, thousands, value, result_va
         "1,2e-10e1",
         "--1,2",
         "1a.2,1",
+        "1..2,3"
     ],
 )
 def test_decimal_and_exponential_erroneous(python_parser_only, thousands, value):

From 567423f7f539191caeb65a035ce274a1d149d66f Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 01:54:40 +0100
Subject: [PATCH 04/10] Run black

---
 pandas/tests/io/parser/test_python_parser_only.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index ce3f64f1ba8b0..a15a0c6292be3 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -348,16 +348,7 @@ def test_decimal_and_exponential(python_parser_only, thousands, value, result_va
 @pytest.mark.parametrize("thousands", [None, "."])
 @pytest.mark.parametrize(
     "value",
-    [
-        "e11,2",
-        "1e11,2",
-        "1,2,2",
-        "1,2.1",
-        "1,2e-10e1",
-        "--1,2",
-        "1a.2,1",
-        "1..2,3"
-    ],
+    ["e11,2", "1e11,2", "1,2,2", "1,2.1", "1,2e-10e1", "--1,2", "1a.2,1", "1..2,3"],
 )
 def test_decimal_and_exponential_erroneous(python_parser_only, thousands, value):
     # GH#31920

From ba163cfd0247663a7c6df0e7e71a04ae219f49fb Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 01:55:21 +0100
Subject: [PATCH 05/10] Fix pep8 issue

---
 pandas/io/parsers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 18ab98a41f708..993f30403155f 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2352,7 +2352,8 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             thousands = self.thousands
             if "." == thousands:
                 thousands = fr"\{thousands}"
-            regex = fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
+            regex = fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?"\
+                    fr"([0-9](E|e)\-?[0-9]*)?$"
             self.nonnum = re.compile(regex)
 
     def _set_no_thousands_columns(self):

From a5e568be5cd07ca1b46c0e38986c9957e6924bbe Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 12 Dec 2020 02:08:15 +0100
Subject: [PATCH 06/10] Fix pep8

---
 pandas/io/parsers.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 993f30403155f..84516398d586b 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2352,8 +2352,10 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             thousands = self.thousands
             if "." == thousands:
                 thousands = fr"\{thousands}"
-            regex = fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?"\
-                    fr"([0-9](E|e)\-?[0-9]*)?$"
+            regex = (
+                fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?"
+                fr"([0-9](E|e)\-?[0-9]*)?$"
+            )
             self.nonnum = re.compile(regex)
 
     def _set_no_thousands_columns(self):

From 514f45ae4e9ef3c4faac0664f9257507977bf494 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 19:40:45 +0100
Subject: [PATCH 07/10] Refactor code

---
 pandas/io/parsers.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 84516398d586b..51ae84454eb20 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2345,18 +2345,16 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
         if len(self.decimal) != 1:
             raise ValueError("Only length-1 decimal markers supported")
 
+        decimal = re.escape(self.decimal)
         if self.thousands is None:
-            regex = fr"^\-?[0-9]*({self.decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
-            self.nonnum = re.compile(regex)
+            regex = fr"^\-?[0-9]*({decimal}[0-9]*)?([0-9](E|e)\-?[0-9]*)?$"
         else:
-            thousands = self.thousands
-            if "." == thousands:
-                thousands = fr"\{thousands}"
+            thousands = re.escape(self.thousands)
             regex = (
-                fr"^\-?([0-9]+{thousands}|[0-9])*({self.decimal}[0-9]*)?"
+                fr"^\-?([0-9]+{thousands}|[0-9])*({decimal}[0-9]*)?"
                 fr"([0-9](E|e)\-?[0-9]*)?$"
             )
-            self.nonnum = re.compile(regex)
+        self.num = re.compile(regex)
 
     def _set_no_thousands_columns(self):
         # Create a set of column ids that are not to be stripped of thousands
@@ -3052,7 +3050,7 @@ def _search_replace_num_columns(self, lines, search, replace):
                     not isinstance(x, str)
                     or search not in x
                     or (self._no_thousands_columns and i in self._no_thousands_columns)
-                    or not self.nonnum.search(x.strip())
+                    or not self.num.search(x.strip())
                 ):
                     rl.append(x)
                 else:

From a0eced563c644ba3f302448056ae570826997056 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Tue, 29 Dec 2020 21:58:21 +0100
Subject: [PATCH 08/10] Update
 pandas/tests/io/parser/test_python_parser_only.py

Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>
---
 pandas/tests/io/parser/test_python_parser_only.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index a15a0c6292be3..4ca681d253d34 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -358,7 +358,7 @@ def test_decimal_and_exponential_erroneous(python_parser_only, thousands, value)
     """
     )
     result = python_parser_only.read_csv(
-        data, "\t", decimal=",", engine="python", thousands=thousands
+        data, "\t", decimal=",", thousands=thousands
     )
     expected = DataFrame({"a": [1.1], "b": [value]})
     tm.assert_frame_equal(result, expected)

From a611dad916f82af289232610697f6711e0e84a5d Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 29 Dec 2020 22:07:19 +0100
Subject: [PATCH 09/10] Fix black bug from autocommit

---
 pandas/tests/io/parser/test_python_parser_only.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index bab3b5996f132..04d5413abfafc 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -348,8 +348,6 @@ def test_decimal_and_exponential_erroneous(python_parser_only, thousands, value)
     1,1	{value}
     """
     )
-    result = python_parser_only.read_csv(
-        data, "\t", decimal=",", thousands=thousands
-    )
+    result = python_parser_only.read_csv(data, "\t", decimal=",", thousands=thousands)
     expected = DataFrame({"a": [1.1], "b": [value]})
     tm.assert_frame_equal(result, expected)

From 9ec9954d41ffc65e06db3572d656857e9d4e89f7 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 1 Jan 2021 23:36:24 +0100
Subject: [PATCH 10/10] Modify whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index a80b18f977378..c8d917a67f37b 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -255,7 +255,7 @@ I/O
 ^^^
 
 - Bug in :meth:`Index.__repr__` when ``display.max_seq_items=1`` (:issue:`38415`)
-- Bug in :func:`read_csv` not recognizing scientific notation if decimal is set (:issue:`31920`)
+- Bug in :func:`read_csv` not recognizing scientific notation if decimal is set for ``engine="python"`` (:issue:`31920`)
 - Bug in :func:`read_csv` interpreting ``NA`` value as comment, when ``NA`` does contain the comment string fixed for ``engine="python"`` (:issue:`34002`)
 - Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
 - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)