Skip to content

Commit 0983759

Browse files
Chang Shewesm
Chang She
authored andcommitted
ENH: custom date converters #1174
1 parent d50bbcc commit 0983759

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

pandas/io/date_converters.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""This module is designed for community supported date conversion functions"""
2+
import numpy as np
3+
import pandas._tseries as lib
4+
5+
def parse_date_time(date_col, time_col):
6+
date_col = _maybe_cast(date_col)
7+
time_col = _maybe_cast(time_col)
8+
return lib.try_parse_date_and_time(date_col, time_col)
9+
10+
def parse_date_fields(year_col, month_col, day_col):
11+
year_col = _maybe_cast(year_col)
12+
month_col = _maybe_cast(month_col)
13+
day_col = _maybe_cast(day_col)
14+
return lib.try_parse_year_month_day(year_col, month_col, day_col)
15+
16+
def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
17+
second_col):
18+
year_col = _maybe_cast(year_col)
19+
month_col = _maybe_cast(month_col)
20+
day_col = _maybe_cast(day_col)
21+
hour_col = _maybe_cast(hour_col)
22+
minute_col = _maybe_cast(minute_col)
23+
second_col = _maybe_cast(second_col)
24+
return lib.try_parse_datetime_components(year_col, month_col, day_col,
25+
hour_col, minute_col, second_col)
26+
27+
def generic_parser(parse_func, *cols):
28+
N = _check_columns(cols)
29+
results = np.empty(N, dtype=object)
30+
31+
for i in xrange(N):
32+
args = [c[i] for c in cols]
33+
results[i] = parse_func(*args)
34+
35+
return results
36+
37+
def _maybe_cast(arr):
38+
if not arr.dtype.type == np.object_:
39+
arr = np.array(arr, dtype=object)
40+
return arr
41+
42+
def _check_columns(cols):
43+
assert(len(cols) > 0)
44+
45+
N = len(cols[0])
46+
for c in cols[1:]:
47+
assert(len(c) == N)
48+
49+
return N
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from pandas.util.py3compat import StringIO, BytesIO
2+
from datetime import date, datetime
3+
import csv
4+
import os
5+
import sys
6+
import re
7+
import unittest
8+
9+
import nose
10+
11+
from numpy import nan
12+
import numpy as np
13+
from numpy.testing.decorators import slow
14+
15+
from pandas import DataFrame, Series, Index, isnull
16+
import pandas.io.parsers as parsers
17+
from pandas.io.parsers import (read_csv, read_table, read_fwf,
18+
ExcelFile, TextParser)
19+
from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
20+
assert_series_equal, network)
21+
import pandas._tseries as lib
22+
from pandas.util import py3compat
23+
from pandas._tseries import Timestamp
24+
import pandas.io.date_converters as conv
25+
26+
class TestConverters(unittest.TestCase):
27+
28+
def setUp(self):
29+
self.years = np.array([2007, 2008])
30+
self.months = np.array([1, 2])
31+
self.days = np.array([3, 4])
32+
self.hours = np.array([5, 6])
33+
self.minutes = np.array([7, 8])
34+
self.seconds = np.array([9, 0])
35+
self.dates = np.array(['2007/1/3', '2008/2/4'], dtype=object)
36+
self.times = np.array(['05:07:09', '06:08:00'], dtype=object)
37+
self.expected = np.array([datetime(2007, 1, 3, 5, 7, 9),
38+
datetime(2008, 2, 4, 6, 8, 0)])
39+
40+
def test_parse_date_time(self):
41+
result = conv.parse_date_time(self.dates, self.times)
42+
self.assert_((result == self.expected).all())
43+
44+
data = """\
45+
date, time, a, b
46+
2001-01-05, 10:00:00, 0.0, 10.
47+
2001-01-05, 00:00:00, 1., 11.
48+
"""
49+
datecols = {'date_time': [0, 1]}
50+
df = read_table(StringIO(data), sep=',', header=0,
51+
parse_dates=datecols, date_parser=conv.parse_date_time)
52+
self.assert_('date_time' in df)
53+
self.assert_(df.date_time.ix[0] == datetime(2001, 1, 5, 10, 0, 0))
54+
55+
def test_parse_date_fields(self):
56+
result = conv.parse_date_fields(self.years, self.months, self.days)
57+
expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
58+
self.assert_((result == expected).all())
59+
60+
data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11."
61+
datecols = {'ymd': [0, 1, 2]}
62+
df = read_table(StringIO(data), sep=',', header=0,
63+
parse_dates=datecols,
64+
date_parser=conv.parse_date_fields)
65+
self.assert_('ymd' in df)
66+
self.assert_(df.ymd.ix[0] == datetime(2001, 1, 10))
67+
68+
def test_datetime_six_col(self):
69+
result = conv.parse_all_fields(self.years, self.months, self.days,
70+
self.hours, self.minutes, self.seconds)
71+
self.assert_((result == self.expected).all())
72+
73+
data = """\
74+
year, month, day, hour, minute, second, a, b
75+
2001, 01, 05, 10, 00, 0, 0.0, 10.
76+
2001, 01, 5, 10, 0, 00, 1., 11.
77+
"""
78+
datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]}
79+
df = read_table(StringIO(data), sep=',', header=0,
80+
parse_dates=datecols,
81+
date_parser=conv.parse_all_fields)
82+
self.assert_('ymdHMS' in df)
83+
self.assert_(df.ymdHMS.ix[0] == datetime(2001, 1, 5, 10, 0, 0))
84+
85+
def test_generic(self):
86+
data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11."
87+
datecols = {'ym': [0, 1]}
88+
dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1)
89+
df = read_table(StringIO(data), sep=',', header=0,
90+
parse_dates=datecols,
91+
date_parser=dateconverter)
92+
self.assert_('ym' in df)
93+
self.assert_(df.ym.ix[0] == date(2001, 1, 1))
94+
95+
96+
if __name__ == '__main__':
97+
import nose
98+
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
99+
exit=False)

0 commit comments

Comments
 (0)