read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

eloraburns · 2013-03-17T02:11:47Z

from cStringIO import StringIO
log_file = StringIO(
    'posix_timestamp,elapsed,sys,user,queries,query_time,rows,'
        'accountid,userid,contactid,level,silo,method\n'
    '1343103150,0.062353,0,4,6,0.01690,3,'
        '12345,1,-1,3,invoice_InvoiceResource,search\n'
)

import pandas as pd
from datetime import datetime
import gc

def posix_string_to_datetime(posix_string):
    return datetime.utcfromtimestamp(int(posix_string))

# This works on pandas 0.9.0, but not on 0.10.1 or github master
df = pd.io.parsers.read_csv(
    log_file,
    # index_col is the first column, our posix_timestamp
    index_col=0,
    # Interpret the index column as a date
    parse_dates=0,
    date_parser=posix_string_to_datetime)

The crash looks like this (on 0.11.0.dev-6e7b37b, OSX 10.6 if it matters):

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-c3fa4840399b> in <module>()
     17     # Interpret the index column as a date
     18     parse_dates=0,
---> 19     date_parser=posix_string_to_datetime)
     20 gc.enable()

/Users/taavi/src/pandas/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze)
    398                     buffer_lines=buffer_lines)
    399 
--> 400         return _read(filepath_or_buffer, kwds)
    401 
    402     parser_f.__name__ = name

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    213         return parser
    214 
--> 215     return parser.read()
    216 
    217 _parser_defaults = {

/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
    630             #     self._engine.set_error_bad_lines(False)
    631 
--> 632         ret = self._engine.read(nrows)
    633 
    634         if self.options.get('as_recarray'):

/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
   1006 
   1007             names, data = self._do_date_conversions(names, data)
-> 1008             index = self._make_index(data, alldata, names)
   1009 
   1010         return index, names, data

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _make_index(self, data, alldata, columns)
    706         elif not self._has_complex_date_col:
    707             index = self._get_simple_index(alldata, columns)
--> 708             index = self._agg_index(index)
    709 
    710         elif self._has_complex_date_col:

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _agg_index(self, index, try_parse_dates)
    789                                                    self.na_values)
    790 
--> 791             arr, _ = self._convert_types(arr, col_na_values)
    792             arrays.append(arr)
    793 

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _convert_types(self, values, na_values, try_num_bool)
    815     def _convert_types(self, values, na_values, try_num_bool=True):
    816         na_count = 0
--> 817         if issubclass(values.dtype.type, (np.number, np.bool_)):
    818             mask = lib.ismember(values, na_values)
    819             na_count = mask.sum()

AttributeError: 'datetime.datetime' object has no attribute 'dtype'

Thanks!

The text was updated successfully, but these errors were encountered:

changhiskhan · 2013-03-18T05:44:33Z

Hmmm... this didn't get test coverage because it's a single row. Calling int() on an array of length 1 returns int, where as an Exception is raised on length >1
Maybe it should just return the parsed datetime as an array in this case, and raise an exception if the result isn't an ndarray? See 67cc87c

wesm · 2013-03-18T16:32:44Z

Oh, boo, now it all makes sense. That's brokenness in NumPy IMHO

wesm closed this as completed in 9e99a5e Mar 17, 2013

changhiskhan mentioned this issue Mar 18, 2013

Int bug #3080

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

eloraburns commented Mar 17, 2013

changhiskhan commented Mar 18, 2013

wesm commented Mar 18, 2013

read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

Comments

eloraburns commented Mar 17, 2013

changhiskhan commented Mar 18, 2013

wesm commented Mar 18, 2013