From fe36220fd973c4218c3c0216b6c449a775496233 Mon Sep 17 00:00:00 2001 From: Damien Garaud Date: Wed, 23 Jan 2013 23:19:29 +0100 Subject: [PATCH 1/2] BUG: Add test to read_csv with usecols and regexp separator. #2733 --- pandas/io/tests/test_parsers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 6ecfca10c12c3..74ccf7326fe53 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -1868,6 +1868,16 @@ def test_usecols_implicit_index_col(self): tm.assert_frame_equal(result, expected) + def test_usecols_regexp_separator(self): + # #2733 + data = 'a b c\n4 apple bat 5.7\n8 orange cow 10' + + result = self.read_csv(StringIO(data), sep='\s+', usecols=('a', 'b')) + expected = DataFrame({'a': ['apple', 'orange'], + 'b': ['bat', 'cow']}, index=[4, 8]) + + tm.assert_frame_equal(result, expected) + def test_pure_python_failover(self): data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo" From 78deb355455a10b81cf539938539df656c340316 Mon Sep 17 00:00:00 2001 From: Damien Garaud Date: Thu, 24 Jan 2013 17:08:21 +0100 Subject: [PATCH 2/2] Replace the regexp separator by a whitespace delimiter with the same data text. --- pandas/io/tests/test_parsers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 74ccf7326fe53..6e380bdd64099 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -1868,11 +1868,12 @@ def test_usecols_implicit_index_col(self): tm.assert_frame_equal(result, expected) - def test_usecols_regexp_separator(self): + def test_usecols_with_whitespace(self): # #2733 data = 'a b c\n4 apple bat 5.7\n8 orange cow 10' - result = self.read_csv(StringIO(data), sep='\s+', usecols=('a', 'b')) + result = self.read_csv(StringIO(data), delim_whitespace=True, + usecols=('a', 'b')) expected = DataFrame({'a': ['apple', 'orange'], 'b': ['bat', 'cow']}, index=[4, 8])