Skip to content

Commit fedb26d

Browse files
committed
BUG: fix issue #4678 for Python parser
1 parent c6bf2eb commit fedb26d

File tree

2 files changed

+35
-6
lines changed

2 files changed

+35
-6
lines changed

pandas/io/parsers.py

+35-3
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@ def __init__(self, f, **kwds):
12771277
self._make_reader(f)
12781278
else:
12791279
self.data = f
1280+
12801281
self.columns = self._infer_columns()
12811282

12821283
# we are processing a multi index column
@@ -1300,6 +1301,38 @@ def __init__(self, f, **kwds):
13001301
self.index_names = index_names
13011302
self._first_chunk = True
13021303

1304+
if self.parse_dates:
1305+
self._no_thousands_columns = self._set_no_thousands_columns()
1306+
else:
1307+
self._no_thousands_columns = None
1308+
1309+
def _set_no_thousands_columns(self):
1310+
# Create a set of column ids that are not to be stripped of thousands operators.
1311+
noconvert_columns = set()
1312+
1313+
def _set(x):
1314+
if com.is_integer(x):
1315+
noconvert_columns.add(x)
1316+
else:
1317+
noconvert_columns.add(self.columns.index(x))
1318+
1319+
if isinstance(self.parse_dates, list):
1320+
for val in self.parse_dates:
1321+
if isinstance(val, list):
1322+
for k in val:
1323+
_set(k)
1324+
else:
1325+
_set(val)
1326+
1327+
elif isinstance(self.parse_dates, dict):
1328+
for val in self.parse_dates.values():
1329+
if isinstance(val, list):
1330+
for k in val:
1331+
_set(k)
1332+
else:
1333+
_set(val)
1334+
return noconvert_columns
1335+
13031336
def _make_reader(self, f):
13041337
sep = self.delimiter
13051338

@@ -1508,7 +1541,6 @@ def _next_line(self):
15081541
line = next(self.data)
15091542

15101543
line = self._check_comments([line])[0]
1511-
line = self._check_thousands([line])[0]
15121544

15131545
self.pos += 1
15141546
self.buf.append(line)
@@ -1540,9 +1572,10 @@ def _check_thousands(self, lines):
15401572
ret = []
15411573
for l in lines:
15421574
rl = []
1543-
for x in l:
1575+
for i, x in enumerate(l):
15441576
if (not isinstance(x, compat.string_types) or
15451577
self.thousands not in x or
1578+
(self._no_thousands_columns and i in self._no_thousands_columns) or
15461579
nonnum.search(x.strip())):
15471580
rl.append(x)
15481581
else:
@@ -1616,7 +1649,6 @@ def _rows_to_cols(self, content):
16161649
raise AssertionError()
16171650

16181651
if col_len != zip_len and self.index_col is not False:
1619-
row_num = -1
16201652
i = 0
16211653
for (i, l) in enumerate(content):
16221654
if len(l) != col_len:

pandas/io/tests/test_parsers.py

-3
Original file line numberDiff line numberDiff line change
@@ -1952,9 +1952,6 @@ def test_1000_sep_with_decimal(self):
19521952
df = self.read_table(StringIO(data), sep='|', thousands=',')
19531953
tm.assert_frame_equal(df, expected)
19541954

1955-
def test_separator_date_conflict(self):
1956-
raise nose.SkipTest("Not supported in Python parser.")
1957-
19581955
def test_comment_fwf(self):
19591956
data = """
19601957
1 2. 4 #hello world

0 commit comments

Comments
 (0)