pandas-dev · jreback · Sep 26, 2013 · Sep 23, 2013 · Sep 23, 2013 · jreback
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -447,6 +447,7 @@ Bug Fixes
   - Fixed a bug in ``convert_objects`` for > 2 ndims (:issue:`4937`)
   - Fixed a bug in DataFrame/Panel cache insertion and subsequent indexing (:issue:`4939`)
   - Fixed string methods for ``FrozenNDArray`` and ``FrozenList`` (:issue:`4929`)
+  - Fixed conflict between thousands separator and date parser in csv_parser (:issue:`4678`)
 
 pandas 0.12.0
 -------------

diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py
@@ -26,7 +26,7 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
     minute_col = _maybe_cast(minute_col)
     second_col = _maybe_cast(second_col)
     return lib.try_parse_datetime_components(year_col, month_col, day_col,
-                                             hour_col, minute_col, second_col)
+                    hour_col, minute_col, second_col)
 
 
 def generic_parser(parse_func, *cols):

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1020,6 +1020,14 @@ def _set(x):
                 else:
                     _set(val)
 
+        elif isinstance(self.parse_dates, dict):
+            for val in self.parse_dates.values():
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
     def set_error_bad_lines(self, status):
         self._reader.set_error_bad_lines(int(status))
 
@@ -1269,6 +1277,7 @@ def __init__(self, f, **kwds):
             self._make_reader(f)
         else:
             self.data = f
+
         self.columns = self._infer_columns()
 
         # we are processing a multi index column
@@ -1292,6 +1301,38 @@ def __init__(self, f, **kwds):
                 self.index_names = index_names
         self._first_chunk = True
 
+        if self.parse_dates:
+            self._no_thousands_columns = self._set_no_thousands_columns()
+        else:
+            self._no_thousands_columns = None
+
+    def _set_no_thousands_columns(self):
+        # Create a set of column ids that are not to be stripped of thousands operators.
+        noconvert_columns = set()
+
+        def _set(x):
+            if com.is_integer(x):
+                noconvert_columns.add(x)
+            else:
+                noconvert_columns.add(self.columns.index(x))
+
+        if isinstance(self.parse_dates, list):
+            for val in self.parse_dates:
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif isinstance(self.parse_dates, dict):
+            for val in self.parse_dates.values():
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+        return noconvert_columns
+
     def _make_reader(self, f):
         sep = self.delimiter
 
@@ -1500,7 +1541,6 @@ def _next_line(self):
             line = next(self.data)
 
         line = self._check_comments([line])[0]
-        line = self._check_thousands([line])[0]
 
         self.pos += 1
         self.buf.append(line)
@@ -1532,9 +1572,10 @@ def _check_thousands(self, lines):
         ret = []
         for l in lines:
             rl = []
-            for x in l:
+            for i, x in enumerate(l):
                 if (not isinstance(x, compat.string_types) or
                     self.thousands not in x or
+                    (self._no_thousands_columns and i in self._no_thousands_columns) or
                         nonnum.search(x.strip())):
                     rl.append(x)
                 else:
@@ -1608,7 +1649,6 @@ def _rows_to_cols(self, content):
             raise AssertionError()
 
         if col_len != zip_len and self.index_col is not False:
-            row_num = -1
             i = 0
             for (i, l) in enumerate(content):
                 if len(l) != col_len:

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -233,6 +233,18 @@ def test_1000_sep_with_decimal(self):
         df = self.read_table(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',')
         tm.assert_frame_equal(df, expected)
 
+    def test_separator_date_conflict(self):
+        # Regression test for issue #4678: make sure thousands separator and
+        # date parsing do not conflict.
+        data = '06-02-2013;13:00;1-000.215'
+        expected = DataFrame(
+            [[datetime(2013, 6, 2, 13, 0, 0), 1000.215]],
+            columns=['Date', 2]
+        )
+
+        df = self.read_csv(StringIO(data), sep=';', thousands='-', parse_dates={'Date': [0, 1]}, header=None)
+        tm.assert_frame_equal(df, expected)
+
     def test_squeeze(self):
         data = """\
 a,1

diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
@@ -708,20 +708,22 @@ def try_parse_datetime_components(ndarray[object] years,
         Py_ssize_t i, n
         ndarray[object] result
         int secs
+        double float_secs
         double micros
 
     from datetime import datetime
 
     n = len(years)
-    if (len(months) != n and len(days) != n and len(hours) != n and
-        len(minutes) != n and len(seconds) != n):
+    if (len(months) != n or len(days) != n or len(hours) != n or
+        len(minutes) != n or len(seconds) != n):
         raise ValueError('Length of all datetime components must be equal')
     result = np.empty(n, dtype='O')
 
     for i from 0 <= i < n:
-        secs = int(seconds[i])
+        float_secs = float(seconds[i])
+        secs = int(float_secs)
 
-        micros = seconds[i] - secs
+        micros = float_secs - secs
         if micros > 0:
             micros = micros * 1000000