Closed
Description
Am trying to convert a string column to seconds in a dataframe. It looks like there is some sort of off by one error.
Here's my csv
"id","Time to Calculate"
1,"19:31:15"
2,"19:18:17"
3,"19:31:15"
4,"19:27:42"
5,"19:27:42"
6,"19:28:25"
7,"19:28:25"
8,"19:28:25"
9,"19:28:04"
10,"19:28:04"
11,"19:28:04"
12,"19:25:56"
13,"19:25:56"
14,"19:25:57"
15,"19:25:57"
16,"19:26:41"
17,"19:26:41"
18,"19:26:08"
19,"19:26:08"
20,"1 day, 1:04:33"
21,"1 day, 1:04:33"
22,"1 day, 1:04:33"
23,"2 days, 2:14:33"
Here's my code
from pandas import read_csv
def sec_to_calc(time_to_calc):
print "TIMETD", time_to_calc, type(time_to_calc)
total = 0
days = 0
if 'days' in time_to_calc:
days, time_to_calc = time_to_calc.split(" days, ")
days = int(days)
elif 'day' in time_to_calc:
days = 1
_, time_to_calc = time_to_calc.split(" day, ")
hours, min, sec = [int(x) for x in time_to_calc.split(':')]
return days + 24 * 60 * 60 + hours *60 * 60 + min * 60 + sec
filename = '/tmp/test.csv'
df = read_csv(filename, converters={'Time to Calculate': sec_to_calc})
Here's my error:
File "/tmp/test.py", line 19, in <module>
df = read_csv(filename, converters={'Time to Calculate': sec_to_calc})
File "/home/mharrison/work/pandas/env/lib/python2.6/site-packages/pandas/io/parsers.py", line 64, in read_csv
return parser.get_chunk()
File "/home/mharrison/work/pandas/env/lib/python2.6/site-packages/pandas/io/parsers.py", line 418, in get_chunk
data = _convert_to_ndarrays(data, self.na_values)
File "/home/mharrison/work/pandas/env/lib/python2.6/site-packages/pandas/io/parsers.py", line 462, in _convert_to_ndarrays
result[c] = _convert_types(values, na_values)
File "/home/mharrison/work/pandas/env/lib/python2.6/site-packages/pandas/io/parsers.py", line 470, in _convert_types
lib.sanitize_objects(values, na_values)
File "parsing.pyx", line 220, in pandas._tseries.sanitize_objects (pandas/src/tseries.c:54380)
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'