-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: support decimal option in PythonParser #12933 #13189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
dc7acd1
b560fda
1472d80
803356e
f71509d
d821052
49613fe
dc8ca62
9f42d0c
465272e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -348,6 +348,7 @@ def _read(filepath_or_buffer, kwds): | |
'keep_default_na': True, | ||
'thousands': None, | ||
'comment': None, | ||
'decimal': b'.', | ||
|
||
# 'engine': 'c', | ||
'parse_dates': False, | ||
|
@@ -383,7 +384,6 @@ def _read(filepath_or_buffer, kwds): | |
'error_bad_lines': True, | ||
'warn_bad_lines': True, | ||
'dtype': None, | ||
'decimal': b'.', | ||
'float_precision': None | ||
} | ||
|
||
|
@@ -404,7 +404,6 @@ def _read(filepath_or_buffer, kwds): | |
'error_bad_lines', | ||
'warn_bad_lines', | ||
'dtype', | ||
'decimal', | ||
'float_precision', | ||
]) | ||
|
||
|
@@ -1582,6 +1581,7 @@ def __init__(self, f, **kwds): | |
self.converters = kwds['converters'] | ||
|
||
self.thousands = kwds['thousands'] | ||
self.decimal = kwds['decimal'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls check / update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, might not be the case, but pls check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback I can't find any reference in |
||
self.comment = kwds['comment'] | ||
self._comment_lines = [] | ||
|
||
|
@@ -1639,6 +1639,9 @@ def __init__(self, f, **kwds): | |
else: | ||
self._no_thousands_columns = None | ||
|
||
if len(self.decimal) != 1: | ||
raise ValueError('Only length-1 decimal markers supported') | ||
|
||
def _set_no_thousands_columns(self): | ||
# Create a set of column ids that are not to be stripped of thousands | ||
# operators. | ||
|
@@ -2050,22 +2053,42 @@ def _check_empty(self, lines): | |
def _check_thousands(self, lines): | ||
if self.thousands is None: | ||
return lines | ||
nonnum = re.compile('[^-^0-9^%s^.]+' % self.thousands) | ||
nonnum = re.compile('[^-^0-9^%s^%s]+' % (self.thousands, self.decimal)) | ||
return self._search_replace_num_columns(lines=lines, | ||
search=self.thousands, | ||
replace='', | ||
nonnum=nonnum) | ||
|
||
def _search_replace_num_columns(self, lines, search, replace, nonnum): | ||
ret = [] | ||
for l in lines: | ||
rl = [] | ||
for i, x in enumerate(l): | ||
if (not isinstance(x, compat.string_types) or | ||
self.thousands not in x or | ||
search not in x or | ||
(self._no_thousands_columns and | ||
i in self._no_thousands_columns) or | ||
nonnum.search(x.strip())): | ||
rl.append(x) | ||
else: | ||
rl.append(x.replace(self.thousands, '')) | ||
rl.append(x.replace(search, replace)) | ||
ret.append(rl) | ||
return ret | ||
|
||
def _check_decimal(self, lines): | ||
if self.decimal == b'.': | ||
return lines | ||
|
||
if self.thousands is None: | ||
nonnum = re.compile('[^-^0-9^%s]+' % self.decimal) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these should be created in init |
||
else: | ||
nonnum = re.compile('[^-^0-9^%s^%s]+' % (self.thousands, | ||
self.decimal)) | ||
return self._search_replace_num_columns(lines=lines, | ||
search=self.decimal, | ||
replace='.', | ||
nonnum=nonnum) | ||
|
||
def _clear_buffer(self): | ||
self.buf = [] | ||
|
||
|
@@ -2249,7 +2272,8 @@ def _get_lines(self, rows=None): | |
lines = self._check_comments(lines) | ||
if self.skip_blank_lines: | ||
lines = self._check_empty(lines) | ||
return self._check_thousands(lines) | ||
lines = self._check_thousands(lines) | ||
return self._check_decimal(lines) | ||
|
||
|
||
def _make_date_converter(date_parser=None, dayfirst=False, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
include the issue number