From 2de4236bd8e09bb82c641109a1d01c66d0d0efb8 Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 11 May 2012 13:16:43 -0400 Subject: [PATCH 1/4] ENH: convert multiple text file columns to a single date column #1186 --- pandas/io/parsers.py | 90 +++++++++++++++++++++++++++++++-- pandas/io/tests/test_parsers.py | 40 +++++++++++++++ 2 files changed, 127 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e218fdce98380..2c92653bf19b2 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -24,6 +24,9 @@ def next(x): from pandas.util.decorators import Appender +class DateConversionError(Exception): + pass + _parser_params = """Also supports optionally iterating or breaking of the file into chunks. @@ -51,6 +54,9 @@ def next(x): date_parser : function Function to use for converting dates to strings. Defaults to dateutil.parser +date_conversion : list or dict, default None + Can combine multiple columns in date-time specification + Newly created columns are prepended to the output dayfirst : boolean, default False DD/MM format dates, international and European format thousands : str, default None @@ -186,6 +192,7 @@ def read_csv(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, + date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -216,6 +223,7 @@ def read_table(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, + date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -250,6 +258,7 @@ def read_fwf(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, + date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -351,6 +360,7 @@ class TextParser(object): Comment out remainder of line parse_dates : boolean, default False date_parser : function, default None + date_conversion : list or dict, default None skiprows : list of integers Row numbers to skip skip_footer : int @@ -362,8 +372,8 @@ class TextParser(object): def __init__(self, f, delimiter=None, names=None, header=0, index_col=None, na_values=None, thousands=None, comment=None, parse_dates=False, - date_parser=None, dayfirst=False, chunksize=None, - skiprows=None, skip_footer=0, converters=None, + date_parser=None, date_conversion=None, dayfirst=False, + chunksize=None, skiprows=None, skip_footer=0, converters=None, verbose=False, encoding=None): """ Workhorse function for processing nested list into DataFrame @@ -382,6 +392,7 @@ def __init__(self, f, delimiter=None, names=None, header=0, self.parse_dates = parse_dates self.date_parser = date_parser + self.date_conversion = date_conversion self.dayfirst = dayfirst if com.is_integer(skiprows): @@ -745,9 +756,11 @@ def get_chunk(self, rows=None): data[x] = lib.try_parse_dates(data[x], parser=self.date_parser, dayfirst=self.dayfirst) + data, columns = self._process_date_conversion(data, self.columns) + data = _convert_to_ndarrays(data, self.na_values, self.verbose) - return DataFrame(data=data, columns=self.columns, index=index) + return DataFrame(data=data, columns=columns, index=index) def _find_line_number(self, exp_len, chunk_len, chunk_i): if exp_len is None: @@ -778,6 +791,52 @@ def _should_parse_dates(self, i): name = self.index_name[i] return i in to_parse or name in to_parse + def _process_date_conversion(self, data_dict, columns): + if self.date_conversion is None: + return data_dict, columns + + new_cols = [] + new_data = {} + + def date_converter(*date_cols): + if self.date_parser is None: + return lib.try_parse_dates(_concat_date_cols(date_cols), + dayfirst=self.dayfirst) + else: + try: + return self.date_parser(date_cols) + except: + return lib.try_parse_dates(_concat_date_cols(date_cols), + parser=self.date_parser, + dayfirst=self.dayfirst) + + if isinstance(self.date_conversion, list): + # list of column lists + for colspec in self.date_conversion: + new_name, col = _try_convert_dates(date_converter, colspec, + data_dict, columns) + if new_name in data_dict: + raise ValueError('Result date column already in dict %s' % + new_name) + new_data[new_name] = col + new_cols.append(new_name) + + elif isinstance(self.date_conversion, dict): + # dict of new name to column list + for new_name, colspec in self.date_conversion.iteritems(): + if new_name in data_dict: + raise ValueError('Date column %s already in dict' % + new_name) + + _, col = _try_convert_dates(date_converter, colspec, data_dict, + columns) + new_data[new_name] = col + new_cols.append(new_name) + + data_dict.update(new_data) + new_cols.extend(columns) + return data_dict, new_cols + def _get_lines(self, rows=None): source = self.data lines = self.buf @@ -860,6 +919,31 @@ def _convert_types(values, na_values): return result, na_count +def _get_col_names(colspec, columns): + colset = set(columns) + colnames = [] + for c in colspec: + if c in colset: + colnames.append(str(c)) + elif isinstance(c, int): + colnames.append(str(columns[c])) + return colnames + +def _try_convert_dates(parser, colspec, data_dict, columns): + colspec = _get_col_names(colspec, columns) + new_name = '_'.join(colspec) + + to_parse = [data_dict[c] for c in colspec if c in data_dict] + try: + new_col = parser(*to_parse) + except DateConversionError: + new_col = _concat_date_cols(to_parse) + return new_name, new_col + +def _concat_date_cols(date_cols): + concat = lambda x: ' '.join(x) + return np.array(np.apply_along_axis(concat, 0, np.vstack(date_cols)), + dtype=object) class FixedWidthReader(object): """ diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 02fc25329e7bc..ed7733aba83c2 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -12,6 +12,7 @@ import numpy as np from pandas import DataFrame, Index, isnull +import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, ExcelFile, TextParser) from pandas.util.testing import assert_almost_equal, assert_frame_equal, network @@ -90,6 +91,45 @@ def test_comment_fwf(self): comment='#') assert_almost_equal(df.values, expected) + def test_multiple_date_col(self): + # Can use multiple date parsers + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + def func(*date_cols): + return lib.try_parse_dates(parsers._concat_date_cols(date_cols)) + + df = read_table(StringIO(data), sep=',', header=None, + date_parser=func, + date_conversion={'nominal' : [1, 2], + 'actual' : [1,3]}) + self.assert_('nominal' in df) + self.assert_('actual' in df) + from datetime import datetime + d = datetime(1999, 1, 27, 19, 0) + self.assert_(df.ix[0, 'nominal'] == d) + + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + df = read_table(StringIO(data), sep=',', header=None, + date_conversion=[[1, 2], [1,3]]) + self.assert_('X.2_X.3' in df) + self.assert_('X.2_X.4' in df) + from datetime import datetime + d = datetime(1999, 1, 27, 19, 0) + self.assert_(df.ix[0, 'X.2_X.3'] == d) + def test_malformed(self): # all data = """ignore From 2659d3f6891144f67a1bfc06908dab28b8f7e433 Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 11 May 2012 13:39:39 -0400 Subject: [PATCH 2/4] Merged extra keyword with parse_dates --- pandas/io/parsers.py | 65 +++++++++++++++------------------ pandas/io/tests/test_parsers.py | 6 +-- 2 files changed, 32 insertions(+), 39 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2c92653bf19b2..2da50384cec6c 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -54,9 +54,6 @@ class DateConversionError(Exception): date_parser : function Function to use for converting dates to strings. Defaults to dateutil.parser -date_conversion : list or dict, default None - Can combine multiple columns in date-time specification - Newly created columns are prepended to the output dayfirst : boolean, default False DD/MM format dates, international and European format thousands : str, default None @@ -161,7 +158,8 @@ def _read(cls, filepath_or_buffer, kwds): f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding) if kwds.get('date_parser', None) is not None: - kwds['parse_dates'] = True + if isinstance(kwds['parse_dates'], bool): + kwds['parse_dates'] = True # Extract some of the arguments (pass chunksize on). kwds.pop('filepath_or_buffer') @@ -192,7 +190,6 @@ def read_csv(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, - date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -223,7 +220,6 @@ def read_table(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, - date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -258,7 +254,6 @@ def read_fwf(filepath_or_buffer, parse_dates=False, dayfirst=False, date_parser=None, - date_conversion=None, nrows=None, iterator=False, chunksize=None, @@ -360,7 +355,6 @@ class TextParser(object): Comment out remainder of line parse_dates : boolean, default False date_parser : function, default None - date_conversion : list or dict, default None skiprows : list of integers Row numbers to skip skip_footer : int @@ -372,7 +366,7 @@ class TextParser(object): def __init__(self, f, delimiter=None, names=None, header=0, index_col=None, na_values=None, thousands=None, comment=None, parse_dates=False, - date_parser=None, date_conversion=None, dayfirst=False, + date_parser=None, dayfirst=False, chunksize=None, skiprows=None, skip_footer=0, converters=None, verbose=False, encoding=None): """ @@ -392,7 +386,6 @@ def __init__(self, f, delimiter=None, names=None, header=0, self.parse_dates = parse_dates self.date_parser = date_parser - self.date_conversion = date_conversion self.dayfirst = dayfirst if com.is_integer(skiprows): @@ -747,16 +740,10 @@ def get_chunk(self, rows=None): col = self.columns[col] data[col] = lib.map_infer(data[col], f) - if not isinstance(self.parse_dates, bool): - for x in self.parse_dates: - if isinstance(x, int) and x not in data: - x = self.orig_columns[x] - if x in self.index_col or x in self.index_name: - continue - data[x] = lib.try_parse_dates(data[x], parser=self.date_parser, - dayfirst=self.dayfirst) - - data, columns = self._process_date_conversion(data, self.columns) + columns = self.columns + if (self.parse_dates is not None and + not isinstance(self.parse_dates, bool)): + data, columns = self._process_date_conversion(data, columns) data = _convert_to_ndarrays(data, self.na_values, self.verbose) @@ -792,9 +779,6 @@ def _should_parse_dates(self, i): return i in to_parse or name in to_parse def _process_date_conversion(self, data_dict, columns): - if self.date_conversion is None: - return data_dict, columns - new_cols = [] new_data = {} @@ -804,26 +788,33 @@ def date_converter(*date_cols): dayfirst=self.dayfirst) else: try: - return self.date_parser(date_cols) + return self.date_parser(*date_cols) except: return lib.try_parse_dates(_concat_date_cols(date_cols), parser=self.date_parser, dayfirst=self.dayfirst) - if isinstance(self.date_conversion, list): + if isinstance(self.parse_dates, list): # list of column lists - for colspec in self.date_conversion: - new_name, col = _try_convert_dates(date_converter, colspec, - data_dict, columns) - if new_name in data_dict: - raise ValueError('Result date column already in dict %s' % - new_name) - new_data[new_name] = col - new_cols.append(new_name) - - elif isinstance(self.date_conversion, dict): + for colspec in self.parse_dates: + if np.isscalar(colspec): + if isinstance(colspec, int) and colspec not in data_dict: + colspec = self.orig_columns[colspec] + if colspec in self.index_col or colspec in self.index_name: + continue + data_dict[colspec] = date_converter(data_dict[colspec]) + else: + new_name, col = _try_convert_dates(date_converter, colspec, + data_dict, columns) + if new_name in data_dict: + raise ValueError('New date column already in dict %s' % + new_name) + new_data[new_name] = col + new_cols.append(new_name) + + elif isinstance(self.parse_dates, dict): # dict of new name to column list - for new_name, colspec in self.date_conversion.iteritems(): + for new_name, colspec in self.parse_dates.iteritems(): if new_name in data_dict: raise ValueError('Date column %s already in dict' % new_name) @@ -941,6 +932,8 @@ def _try_convert_dates(parser, colspec, data_dict, columns): return new_name, new_col def _concat_date_cols(date_cols): + if len(date_cols) == 1: + return date_cols[0] concat = lambda x: ' '.join(x) return np.array(np.apply_along_axis(concat, 0, np.vstack(date_cols)), dtype=object) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index ed7733aba83c2..b5184f59bf114 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -106,8 +106,8 @@ def func(*date_cols): df = read_table(StringIO(data), sep=',', header=None, date_parser=func, - date_conversion={'nominal' : [1, 2], - 'actual' : [1,3]}) + parse_dates={'nominal' : [1, 2], + 'actual' : [1,3]}) self.assert_('nominal' in df) self.assert_('actual' in df) from datetime import datetime @@ -123,7 +123,7 @@ def func(*date_cols): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 """ df = read_table(StringIO(data), sep=',', header=None, - date_conversion=[[1, 2], [1,3]]) + parse_dates=[[1, 2], [1,3]]) self.assert_('X.2_X.3' in df) self.assert_('X.2_X.4' in df) from datetime import datetime From 53f7df628b8cefe22f2551684106601f997363ce Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 11 May 2012 13:48:17 -0400 Subject: [PATCH 3/4] TST: VB for multiple date columns --- vb_suite/parser.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 7c2754ca7da07..8c6abafa5b89a 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -50,3 +50,42 @@ setup, cleanup="os.remove('test.csv')", start_date=datetime(2012, 5, 7)) + +setup = common_setup + """ +from pandas import read_table +from cStringIO import StringIO +import os +N = 10000 +K = 8 +data = '''\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +''' +data = data * 2000 +""" +cmd = ("read_table(StringIO(data), sep=',', header=None, " + "parse_dates=[[1,2], [1,3]])") +sdate = datetime(2012, 5, 7) +read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) + +setup = common_setup + """ +from pandas import read_table +from cStringIO import StringIO +import os +N = 10000 +K = 8 +data = '''\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +''' +data = data * 2000 +""" +cmd = "read_table(StringIO(data), sep=',', header=None)" +sdate = datetime(2012, 5, 7) +read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate) From 8d987598c7973a7da1a16467e229d0e7e3a47d37 Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 11 May 2012 15:53:14 -0400 Subject: [PATCH 4/4] A few related bug fixes --- pandas/io/parsers.py | 64 ++++++++++++++++++--------------- pandas/io/tests/test_parsers.py | 13 +++++++ vb_suite/parser.py | 12 +++---- 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2da50384cec6c..a46ad2a58e1e4 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -676,7 +676,6 @@ def get_chunk(self, rows=None): zipped_content = list(lib.to_object_array(content).T) - # no index column specified, so infer that's what is wanted if self.index_col is not None: if np.isscalar(self.index_col): index = zipped_content.pop(self.index_col) @@ -690,9 +689,8 @@ def get_chunk(self, rows=None): zipped_content.pop(i) if np.isscalar(self.index_col): - if self._should_parse_dates(0): - index = lib.try_parse_dates(index, parser=self.date_parser, - dayfirst=self.dayfirst) + if self._should_parse_dates(self.index_col): + index = self._conv_date(index) index, na_count = _convert_types(index, self.na_values) index = Index(index, name=self.index_name) if self.verbose and na_count: @@ -700,9 +698,8 @@ def get_chunk(self, rows=None): else: arrays = [] for i, arr in enumerate(index): - if self._should_parse_dates(i): - arr = lib.try_parse_dates(arr, parser=self.date_parser, - dayfirst=self.dayfirst) + if self._should_parse_dates(self.index_col[i]): + arr = self._conv_date(arr) arr, _ = _convert_types(arr, self.na_values) arrays.append(arr) index = MultiIndex.from_arrays(arrays, names=self.index_name) @@ -741,9 +738,8 @@ def get_chunk(self, rows=None): data[col] = lib.map_infer(data[col], f) columns = self.columns - if (self.parse_dates is not None and - not isinstance(self.parse_dates, bool)): - data, columns = self._process_date_conversion(data, columns) + if self.parse_dates is not None: + data, columns = self._process_date_conversion(data) data = _convert_to_ndarrays(data, self.na_values, self.verbose) @@ -778,21 +774,25 @@ def _should_parse_dates(self, i): name = self.index_name[i] return i in to_parse or name in to_parse - def _process_date_conversion(self, data_dict, columns): + def _conv_date(self, *date_cols): + if self.date_parser is None: + return lib.try_parse_dates(_concat_date_cols(date_cols), + dayfirst=self.dayfirst) + else: + try: + return self.date_parser(*date_cols) + except: + return lib.try_parse_dates(_concat_date_cols(date_cols), + parser=self.date_parser, + dayfirst=self.dayfirst) + + def _process_date_conversion(self, data_dict): new_cols = [] new_data = {} + columns = self.columns - def date_converter(*date_cols): - if self.date_parser is None: - return lib.try_parse_dates(_concat_date_cols(date_cols), - dayfirst=self.dayfirst) - else: - try: - return self.date_parser(*date_cols) - except: - return lib.try_parse_dates(_concat_date_cols(date_cols), - parser=self.date_parser, - dayfirst=self.dayfirst) + if self.parse_dates is None or isinstance(self.parse_dates, bool): + return data_dict, columns if isinstance(self.parse_dates, list): # list of column lists @@ -800,12 +800,12 @@ def date_converter(*date_cols): if np.isscalar(colspec): if isinstance(colspec, int) and colspec not in data_dict: colspec = self.orig_columns[colspec] - if colspec in self.index_col or colspec in self.index_name: + if self._isindex(colspec): continue - data_dict[colspec] = date_converter(data_dict[colspec]) + data_dict[colspec] = self._conv_date(data_dict[colspec]) else: - new_name, col = _try_convert_dates(date_converter, colspec, - data_dict, columns) + new_name, col = _try_convert_dates(self._conv_date, colspec, + data_dict, self.orig_columns) if new_name in data_dict: raise ValueError('New date column already in dict %s' % new_name) @@ -819,8 +819,8 @@ def date_converter(*date_cols): raise ValueError('Date column %s already in dict' % new_name) - _, col = _try_convert_dates(date_converter, colspec, data_dict, - columns) + _, col = _try_convert_dates(self._conv_date, colspec, data_dict, + self.orig_columns) new_data[new_name] = col new_cols.append(new_name) @@ -828,6 +828,14 @@ def date_converter(*date_cols): new_cols.extend(columns) return data_dict, new_cols + def _isindex(self, colspec): + return (colspec == self.index_col or + (isinstance(self.index_col, list) and + colspec in self.index_col) or + (colspec == self.index_name or + (isinstance(self.index_name, list) and + colspec in self.index_name))) + def _get_lines(self, rows=None): source = self.data lines = self.buf diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index b5184f59bf114..1a8ad3f13ad4e 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -130,6 +130,19 @@ def func(*date_cols): d = datetime(1999, 1, 27, 19, 0) self.assert_(df.ix[0, 'X.2_X.3'] == d) + data = '''\ +KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +''' + df = read_table(StringIO(data), sep=',', header=None, + parse_dates=[1], index_col=1) + from datetime import datetime + d = datetime(1999, 1, 27, 19, 0) + self.assert_(df.index[0] == d) + def test_malformed(self): # all data = """ignore diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 8c6abafa5b89a..946e1327578c0 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -78,14 +78,14 @@ N = 10000 K = 8 data = '''\ -KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 -KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 -KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 -KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 -KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 ''' data = data * 2000 """ -cmd = "read_table(StringIO(data), sep=',', header=None)" +cmd = "read_table(StringIO(data), sep=',', header=None, parse_dates=[1])" sdate = datetime(2012, 5, 7) read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate)