BUG: fix issue #4678 for Python parser

guyrt · guyrt · commit fedb26dc5856 · 2013-09-23T20:04:30.000-04:00
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1277,6 +1277,7 @@ def __init__(self, f, **kwds):
             self._make_reader(f)
         else:
             self.data = f
+
         self.columns = self._infer_columns()
 
         # we are processing a multi index column
@@ -1300,6 +1301,38 @@ def __init__(self, f, **kwds):
                 self.index_names = index_names
         self._first_chunk = True
 
+        if self.parse_dates:
+            self._no_thousands_columns = self._set_no_thousands_columns()
+        else:
+            self._no_thousands_columns = None
+
+    def _set_no_thousands_columns(self):
+        # Create a set of column ids that are not to be stripped of thousands operators.
+        noconvert_columns = set()
+
+        def _set(x):
+            if com.is_integer(x):
+                noconvert_columns.add(x)
+            else:
+                noconvert_columns.add(self.columns.index(x))
+
+        if isinstance(self.parse_dates, list):
+            for val in self.parse_dates:
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif isinstance(self.parse_dates, dict):
+            for val in self.parse_dates.values():
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+        return noconvert_columns
+
     def _make_reader(self, f):
         sep = self.delimiter
 
@@ -1508,7 +1541,6 @@ def _next_line(self):
             line = next(self.data)
 
         line = self._check_comments([line])[0]
-        line = self._check_thousands([line])[0]
 
         self.pos += 1
         self.buf.append(line)
@@ -1540,9 +1572,10 @@ def _check_thousands(self, lines):
         ret = []
         for l in lines:
             rl = []
-            for x in l:
+            for i, x in enumerate(l):
                 if (not isinstance(x, compat.string_types) or
                     self.thousands not in x or
+                    (self._no_thousands_columns and i in self._no_thousands_columns) or
                         nonnum.search(x.strip())):
                     rl.append(x)
                 else:
@@ -1616,7 +1649,6 @@ def _rows_to_cols(self, content):
             raise AssertionError()
 
         if col_len != zip_len and self.index_col is not False:
-            row_num = -1
             i = 0
             for (i, l) in enumerate(content):
                 if len(l) != col_len:
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -1952,9 +1952,6 @@ def test_1000_sep_with_decimal(self):
         df = self.read_table(StringIO(data), sep='|', thousands=',')
         tm.assert_frame_equal(df, expected)
 
-    def test_separator_date_conflict(self):
-        raise nose.SkipTest("Not supported in Python parser.")
-
     def test_comment_fwf(self):
         data = """
   1   2.   4  #hello world