Skip to content

Commit c6bf2eb

Browse files
committed
BUG: Conflict between thousands sep and date parser.
Fixes issue where thousands separator could conflict with date parsing. This is only fixed in the C parser. Closes issue #4678
1 parent 1aa4ed4 commit c6bf2eb

File tree

5 files changed

+31
-5
lines changed

5 files changed

+31
-5
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ Bug Fixes
447447
- Fixed a bug in ``convert_objects`` for > 2 ndims (:issue:`4937`)
448448
- Fixed a bug in DataFrame/Panel cache insertion and subsequent indexing (:issue:`4939`)
449449
- Fixed string methods for ``FrozenNDArray`` and ``FrozenList`` (:issue:`4929`)
450+
- Fixed conflict between thousands separator and date parser in csv_parser (:issue:`4678`)
450451

451452
pandas 0.12.0
452453
-------------

pandas/io/date_converters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
2626
minute_col = _maybe_cast(minute_col)
2727
second_col = _maybe_cast(second_col)
2828
return lib.try_parse_datetime_components(year_col, month_col, day_col,
29-
hour_col, minute_col, second_col)
29+
hour_col, minute_col, second_col)
3030

3131

3232
def generic_parser(parse_func, *cols):

pandas/io/parsers.py

+8
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,14 @@ def _set(x):
10201020
else:
10211021
_set(val)
10221022

1023+
elif isinstance(self.parse_dates, dict):
1024+
for val in self.parse_dates.values():
1025+
if isinstance(val, list):
1026+
for k in val:
1027+
_set(k)
1028+
else:
1029+
_set(val)
1030+
10231031
def set_error_bad_lines(self, status):
10241032
self._reader.set_error_bad_lines(int(status))
10251033

pandas/io/tests/test_parsers.py

+15
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,18 @@ def test_1000_sep_with_decimal(self):
233233
df = self.read_table(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',')
234234
tm.assert_frame_equal(df, expected)
235235

236+
def test_separator_date_conflict(self):
237+
# Regression test for issue #4678: make sure thousands separator and
238+
# date parsing do not conflict.
239+
data = '06-02-2013;13:00;1-000.215'
240+
expected = DataFrame(
241+
[[datetime(2013, 6, 2, 13, 0, 0), 1000.215]],
242+
columns=['Date', 2]
243+
)
244+
245+
df = self.read_csv(StringIO(data), sep=';', thousands='-', parse_dates={'Date': [0, 1]}, header=None)
246+
tm.assert_frame_equal(df, expected)
247+
236248
def test_squeeze(self):
237249
data = """\
238250
a,1
@@ -1940,6 +1952,9 @@ def test_1000_sep_with_decimal(self):
19401952
df = self.read_table(StringIO(data), sep='|', thousands=',')
19411953
tm.assert_frame_equal(df, expected)
19421954

1955+
def test_separator_date_conflict(self):
1956+
raise nose.SkipTest("Not supported in Python parser.")
1957+
19431958
def test_comment_fwf(self):
19441959
data = """
19451960
1 2. 4 #hello world

pandas/src/inference.pyx

+6-4
Original file line numberDiff line numberDiff line change
@@ -708,20 +708,22 @@ def try_parse_datetime_components(ndarray[object] years,
708708
Py_ssize_t i, n
709709
ndarray[object] result
710710
int secs
711+
double float_secs
711712
double micros
712713

713714
from datetime import datetime
714715

715716
n = len(years)
716-
if (len(months) != n and len(days) != n and len(hours) != n and
717-
len(minutes) != n and len(seconds) != n):
717+
if (len(months) != n or len(days) != n or len(hours) != n or
718+
len(minutes) != n or len(seconds) != n):
718719
raise ValueError('Length of all datetime components must be equal')
719720
result = np.empty(n, dtype='O')
720721

721722
for i from 0 <= i < n:
722-
secs = int(seconds[i])
723+
float_secs = float(seconds[i])
724+
secs = int(float_secs)
723725

724-
micros = seconds[i] - secs
726+
micros = float_secs - secs
725727
if micros > 0:
726728
micros = micros * 1000000
727729

0 commit comments

Comments
 (0)