From d8decae5f65c3ca576deb3d2fb9a843e73fe7fd1 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 16:55:12 +0100 Subject: [PATCH 01/11] #13398 Change the way of reading back to readline (consistent with the test before entering the function) --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9baff67845dac..dc9455289b757 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1868,7 +1868,7 @@ class MyDialect(csv.Dialect): else: def _read(): - line = next(f) + line = f.readline() pat = re.compile(sep) yield pat.split(line.strip()) for line in f: From 5af84654e1fc578f3b64a65cfe0c2c1406df1f8f Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 17:32:15 +0100 Subject: [PATCH 02/11] Adding a test with Python engine --- pandas/io/tests/test_common.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 46c34abf5aeb7..095accc258fb0 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -11,7 +11,7 @@ from pandas.io import common from pandas.compat import is_platform_windows, StringIO -from pandas import read_csv, concat +from pandas import read_csv, read_table, concat try: from pathlib import Path @@ -90,6 +90,15 @@ def test_iterator(self): expected.index = [0 for i in range(len(expected))] tm.assert_frame_equal(concat(it), expected.iloc[1:]) + def test_temporary_file(self): + from tempfile import TemporaryFile + new_file = TemporaryFile("w+") + new_file.write("0 0") + new_file.flush() + new_file.seek(0) + + dataframe = read_table(new_file, sep=r"\s+", header=None, engine="python") + class TestMMapWrapper(tm.TestCase): From 119fb65da9484b7d1d43fb2f223febc338ac47c4 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 17:41:24 +0100 Subject: [PATCH 03/11] Added reference to original issue in the test + test the result itself (assuming that previous test is OK) --- pandas/io/tests/test_common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 095accc258fb0..1048286f01585 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -11,7 +11,7 @@ from pandas.io import common from pandas.compat import is_platform_windows, StringIO -from pandas import read_csv, read_table, concat +from pandas import read_csv, concat try: from pathlib import Path @@ -33,7 +33,8 @@ class TestCommonIOCapabilities(tm.TestCase): foo2,12,13,14,15 bar2,12,13,14,15 """ - + data2 = data1.replace(",", " ") + def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) @@ -90,14 +91,17 @@ def test_iterator(self): expected.index = [0 for i in range(len(expected))] tm.assert_frame_equal(concat(it), expected.iloc[1:]) + #13398 def test_temporary_file(self): from tempfile import TemporaryFile new_file = TemporaryFile("w+") - new_file.write("0 0") + new_file.write(self.data2) new_file.flush() new_file.seek(0) - dataframe = read_table(new_file, sep=r"\s+", header=None, engine="python") + result = read_csv(new_file, sep=r"\s+", engine="python") + expected = read_csv(StringIO(self.data1)) + tm.assert_frame_equal(result, expected) class TestMMapWrapper(tm.TestCase): From 98e476e23979dd3b038d473a3653cab2353e3df6 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 17:42:47 +0100 Subject: [PATCH 04/11] Using same way of referencing as just above, consistency. --- pandas/io/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 1048286f01585..c5b07988fa374 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -91,8 +91,8 @@ def test_iterator(self): expected.index = [0 for i in range(len(expected))] tm.assert_frame_equal(concat(it), expected.iloc[1:]) - #13398 def test_temporary_file(self): + # GH13398 from tempfile import TemporaryFile new_file = TemporaryFile("w+") new_file.write(self.data2) From fd20aaf34c4d4016e387ed70e78e09e832814370 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 18:05:08 +0100 Subject: [PATCH 05/11] Moved the test to the Python parser test file --- pandas/io/tests/parser/python_parser_only.py | 23 ++++++++++++++++++++ pandas/io/tests/test_common.py | 13 ----------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index a08cb36c13f80..f08f81ffe8f4d 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -171,3 +171,26 @@ def test_read_table_buglet_4x_multiindex(self): columns=list('abcABC'), index=list('abc')) actual = self.read_table(StringIO(data), sep='\s+') tm.assert_frame_equal(actual, expected) + + def test_temporary_file(self): + # GH13398 + data1 = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + data2 = data1.replace(",", " ") + + from tempfile import TemporaryFile + new_file = TemporaryFile("w+") + new_file.write(data2) + new_file.flush() + new_file.seek(0) + + result = self.read_csv(new_file, sep=r"\s+", engine="python") + expected = self.read_csv(StringIO(data1)) + tm.assert_frame_equal(result, expected) + \ No newline at end of file diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index c5b07988fa374..40c551656f483 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -33,7 +33,6 @@ class TestCommonIOCapabilities(tm.TestCase): foo2,12,13,14,15 bar2,12,13,14,15 """ - data2 = data1.replace(",", " ") def test_expand_user(self): filename = '~/sometest' @@ -91,18 +90,6 @@ def test_iterator(self): expected.index = [0 for i in range(len(expected))] tm.assert_frame_equal(concat(it), expected.iloc[1:]) - def test_temporary_file(self): - # GH13398 - from tempfile import TemporaryFile - new_file = TemporaryFile("w+") - new_file.write(self.data2) - new_file.flush() - new_file.seek(0) - - result = read_csv(new_file, sep=r"\s+", engine="python") - expected = read_csv(StringIO(self.data1)) - tm.assert_frame_equal(result, expected) - class TestMMapWrapper(tm.TestCase): From d8ceb5709d4a2101562f41a53be2ff2560487b38 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sat, 18 Jun 2016 23:22:48 +0100 Subject: [PATCH 06/11] lint changes --- pandas/io/tests/parser/python_parser_only.py | 1 - pandas/io/tests/test_common.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index f08f81ffe8f4d..3296b7283d57a 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -193,4 +193,3 @@ def test_temporary_file(self): result = self.read_csv(new_file, sep=r"\s+", engine="python") expected = self.read_csv(StringIO(data1)) tm.assert_frame_equal(result, expected) - \ No newline at end of file diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 40c551656f483..46c34abf5aeb7 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -33,7 +33,7 @@ class TestCommonIOCapabilities(tm.TestCase): foo2,12,13,14,15 bar2,12,13,14,15 """ - + def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) From 1c33fb501b03436976ccacea2678eeaf4d38a1c0 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sun, 19 Jun 2016 21:09:05 +0100 Subject: [PATCH 07/11] Simplified test and added what's new note. --- doc/source/whatsnew/v0.18.2.txt | 1 + pandas/io/tests/parser/python_parser_only.py | 14 ++++---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 8a14765aa6df2..f895de1a8e982 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -493,6 +493,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 a file with the separator expressed as a regex (:issue:`13398`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) - Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 3296b7283d57a..369276ef3d8f2 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -174,14 +174,7 @@ def test_read_table_buglet_4x_multiindex(self): def test_temporary_file(self): # GH13398 - data1 = """index,A,B,C,D -foo,2,3,4,5 -bar,7,8,9,10 -baz,12,13,14,15 -qux,12,13,14,15 -foo2,12,13,14,15 -bar2,12,13,14,15 -""" + data1 = "0,0" data2 = data1.replace(",", " ") from tempfile import TemporaryFile @@ -190,6 +183,7 @@ def test_temporary_file(self): new_file.flush() new_file.seek(0) - result = self.read_csv(new_file, sep=r"\s+", engine="python") - expected = self.read_csv(StringIO(data1)) + result = self.read_csv(new_file, sep=r"\s+", header=None) + expected = DataFrame([[0, 0]]) tm.assert_frame_equal(result, expected) + From aa3f0aad33b186cfc7924b98f3f2427af70d7e20 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sun, 19 Jun 2016 21:36:07 +0100 Subject: [PATCH 08/11] lint change --- pandas/io/tests/parser/python_parser_only.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 369276ef3d8f2..2d883ace0b93b 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -186,4 +186,3 @@ def test_temporary_file(self): result = self.read_csv(new_file, sep=r"\s+", header=None) expected = DataFrame([[0, 0]]) tm.assert_frame_equal(result, expected) - From 587162513d88839e17901c0122a190413fea7942 Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sun, 19 Jun 2016 22:18:45 +0100 Subject: [PATCH 09/11] Grammar --- doc/source/whatsnew/v0.18.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index f895de1a8e982..9a4d39b4e3390 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -493,7 +493,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 a file with the separator expressed as a regex (:issue:`13398`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3, separator expressed as a regex (:issue:`13398`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) - Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) From 0d54151b991e2c5961bc57147de498666aea607f Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sun, 19 Jun 2016 22:19:59 +0100 Subject: [PATCH 10/11] Simplified --- pandas/io/tests/parser/python_parser_only.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 2d883ace0b93b..7501c8b656013 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -174,12 +174,11 @@ def test_read_table_buglet_4x_multiindex(self): def test_temporary_file(self): # GH13398 - data1 = "0,0" - data2 = data1.replace(",", " ") + data1 = "0 0" from tempfile import TemporaryFile new_file = TemporaryFile("w+") - new_file.write(data2) + new_file.write(data1) new_file.flush() new_file.seek(0) From 8b52631454c9d63e8bf2f633221e54ec902a412e Mon Sep 17 00:00:00 2001 From: Matthieu Brucher Date: Sun, 19 Jun 2016 22:30:07 +0100 Subject: [PATCH 11/11] Yet another small update for more general regex --- pandas/io/tests/parser/python_parser_only.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 7501c8b656013..6f0ea75c4da93 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -182,6 +182,6 @@ def test_temporary_file(self): new_file.flush() new_file.seek(0) - result = self.read_csv(new_file, sep=r"\s+", header=None) + result = self.read_csv(new_file, sep=r"\s*", header=None) expected = DataFrame([[0, 0]]) tm.assert_frame_equal(result, expected)