|
16 | 16 | from pandas.core.config import get_option
|
17 | 17 | from pandas.io.date_converters import generic_parser
|
18 | 18 | from pandas.io.common import get_filepath_or_buffer
|
| 19 | +from pandas.tseries import tools |
19 | 20 |
|
20 | 21 | from pandas.util.decorators import Appender
|
21 | 22 |
|
|
143 | 144 | warn_bad_lines: boolean, default True
|
144 | 145 | If error_bad_lines is False, and warn_bad_lines is True, a warning for each
|
145 | 146 | "bad line" will be output. (Only valid with C parser).
|
| 147 | +infer_datetime_format : boolean, default False |
| 148 | + If True and parse_dates is enabled for a column, attempt to infer |
| 149 | + the datetime format to speed up the processing |
146 | 150 |
|
147 | 151 | Returns
|
148 | 152 | -------
|
@@ -262,6 +266,7 @@ def _read(filepath_or_buffer, kwds):
|
262 | 266 | 'compression': None,
|
263 | 267 | 'mangle_dupe_cols': True,
|
264 | 268 | 'tupleize_cols': False,
|
| 269 | + 'infer_datetime_format': False, |
265 | 270 | }
|
266 | 271 |
|
267 | 272 |
|
@@ -349,7 +354,8 @@ def parser_f(filepath_or_buffer,
|
349 | 354 | encoding=None,
|
350 | 355 | squeeze=False,
|
351 | 356 | mangle_dupe_cols=True,
|
352 |
| - tupleize_cols=False): |
| 357 | + tupleize_cols=False, |
| 358 | + infer_datetime_format=False): |
353 | 359 |
|
354 | 360 | # Alias sep -> delimiter.
|
355 | 361 | if delimiter is None:
|
@@ -408,7 +414,8 @@ def parser_f(filepath_or_buffer,
|
408 | 414 | low_memory=low_memory,
|
409 | 415 | buffer_lines=buffer_lines,
|
410 | 416 | mangle_dupe_cols=mangle_dupe_cols,
|
411 |
| - tupleize_cols=tupleize_cols) |
| 417 | + tupleize_cols=tupleize_cols, |
| 418 | + infer_datetime_format=infer_datetime_format) |
412 | 419 |
|
413 | 420 | return _read(filepath_or_buffer, kwds)
|
414 | 421 |
|
@@ -665,9 +672,13 @@ def __init__(self, kwds):
|
665 | 672 | self.true_values = kwds.get('true_values')
|
666 | 673 | self.false_values = kwds.get('false_values')
|
667 | 674 | self.tupleize_cols = kwds.get('tupleize_cols', False)
|
| 675 | + self.infer_datetime_format = kwds.pop('infer_datetime_format', False) |
668 | 676 |
|
669 |
| - self._date_conv = _make_date_converter(date_parser=self.date_parser, |
670 |
| - dayfirst=self.dayfirst) |
| 677 | + self._date_conv = _make_date_converter( |
| 678 | + date_parser=self.date_parser, |
| 679 | + dayfirst=self.dayfirst, |
| 680 | + infer_datetime_format=self.infer_datetime_format |
| 681 | + ) |
671 | 682 |
|
672 | 683 | # validate header options for mi
|
673 | 684 | self.header = kwds.get('header')
|
@@ -1178,6 +1189,10 @@ def TextParser(*args, **kwds):
|
1178 | 1189 | Encoding to use for UTF when reading/writing (ex. 'utf-8')
|
1179 | 1190 | squeeze : boolean, default False
|
1180 | 1191 | returns Series if only one column
|
| 1192 | + infer_datetime_format: boolean, default False |
| 1193 | + If True and `parse_dates` is True for a column, try to infer the |
| 1194 | + datetime format based on the first datetime string. If the format |
| 1195 | + can be inferred, there often will be a large parsing speed-up. |
1181 | 1196 | """
|
1182 | 1197 | kwds['engine'] = 'python'
|
1183 | 1198 | return TextFileReader(*args, **kwds)
|
@@ -1870,13 +1885,19 @@ def _get_lines(self, rows=None):
|
1870 | 1885 | return self._check_thousands(lines)
|
1871 | 1886 |
|
1872 | 1887 |
|
1873 |
| -def _make_date_converter(date_parser=None, dayfirst=False): |
| 1888 | +def _make_date_converter(date_parser=None, dayfirst=False, |
| 1889 | + infer_datetime_format=False): |
1874 | 1890 | def converter(*date_cols):
|
1875 | 1891 | if date_parser is None:
|
1876 | 1892 | strs = _concat_date_cols(date_cols)
|
1877 | 1893 | try:
|
1878 |
| - return tslib.array_to_datetime(com._ensure_object(strs), |
1879 |
| - utc=None, dayfirst=dayfirst) |
| 1894 | + return tools.to_datetime( |
| 1895 | + com._ensure_object(strs), |
| 1896 | + utc=None, |
| 1897 | + box=False, |
| 1898 | + dayfirst=dayfirst, |
| 1899 | + infer_datetime_format=infer_datetime_format |
| 1900 | + ) |
1880 | 1901 | except:
|
1881 | 1902 | return lib.try_parse_dates(strs, dayfirst=dayfirst)
|
1882 | 1903 | else:
|
|
0 commit comments