Skip to content

Commit 5b4be0b

Browse files
committed
ENH: initial version of convert_to_annual for pandas, #736
1 parent 570a03a commit 5b4be0b

File tree

6 files changed

+154
-5
lines changed

6 files changed

+154
-5
lines changed

pandas/core/reshape.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,6 @@ def get_result(self):
130130
return DataFrame(values, index=index, columns=columns)
131131

132132
def get_new_values(self):
133-
return self._reshape_values(self.values)
134-
135-
def _reshape_values(self, values):
136133
values = self.values
137134
# place the values
138135
length, width = self.full_shape
@@ -148,7 +145,7 @@ def _reshape_values(self, values):
148145
new_values.fill(np.nan)
149146

150147
# is there a simpler / faster way of doing this?
151-
for i in xrange(self.values.shape[1]):
148+
for i in xrange(values.shape[1]):
152149
chunk = new_values[:, i * width : (i + 1) * width]
153150
mask_chunk = new_mask[:, i * width : (i + 1) * width]
154151

@@ -200,6 +197,8 @@ def get_new_index(self):
200197

201198
return new_index
202199

200+
201+
203202
def pivot(self, index=None, columns=None, values=None):
204203
"""
205204
See DataFrame.pivot

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,8 @@ def copy(self, order='C'):
839839
-------
840840
cp : Series
841841
"""
842-
return Series(self.values.copy(order), index=self.index, name=self.name)
842+
return Series(self.values.copy(order), index=self.index,
843+
name=self.name)
843844

844845
def to_dict(self):
845846
"""

pandas/tseries/__init__.py

Whitespace-only changes.

pandas/tseries/tests/__init__.py

Whitespace-only changes.

pandas/tseries/tests/test_tools.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import nose
2+
import unittest
3+
4+
import numpy as np
5+
6+
from pandas import Series, date_range
7+
import pandas.util.testing as tm
8+
9+
from pandas.tseries.tools import convert_to_annual, isleapyear
10+
11+
class TestConvertAnnual(unittest.TestCase):
12+
"""
13+
New pandas of scikits.timeseries convert_to_annual
14+
"""
15+
def test_daily(self):
16+
rng = date_range('1/1/2000', '12/31/2004', freq='D')
17+
ts = Series(np.random.randn(len(rng)), index=rng)
18+
19+
annual = convert_to_annual(ts, 'D')
20+
21+
doy = ts.index.dayofyear
22+
doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1
23+
24+
for i in range(1, 367):
25+
subset = ts[doy == i]
26+
subset.index = [x.year for x in subset.index]
27+
28+
tm.assert_series_equal(annual[i].dropna(), subset)
29+
30+
# check leap days
31+
leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)]
32+
day = leaps.index.dayofyear[0]
33+
leaps.index = leaps.index.year
34+
tm.assert_series_equal(annual[day].dropna(), leaps)
35+
36+
def test_weekly(self):
37+
pass
38+
39+
def test_monthly(self):
40+
rng = date_range('1/1/2000', '12/31/2004', freq='M')
41+
ts = Series(np.random.randn(len(rng)), index=rng)
42+
43+
annual = convert_to_annual(ts, 'M')
44+
45+
month = ts.index.month
46+
47+
for i in range(1, 13):
48+
subset = ts[month == i]
49+
subset.index = [x.year for x in subset.index]
50+
tm.assert_series_equal(annual[i].dropna(), subset)
51+
52+
def test_interval_monthly(self):
53+
pass
54+
55+
def test_interval_daily(self):
56+
pass
57+
58+
def test_interval_weekly(self):
59+
pass
60+
61+
if __name__ == '__main__':
62+
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
63+
exit=False)
64+

pandas/tseries/tools.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from pandas.core.frame import DataFrame
2+
import pandas.core.nanops as nanops
3+
4+
import numpy as np
5+
6+
def convert_to_annual(series, freq=None):
7+
"""
8+
Group a series by years, taking leap years into account.
9+
10+
The output has as many rows as distinct years in the original series,
11+
and as many columns as the length of a leap year in the units corresponding
12+
to the original frequency (366 for daily frequency, 366*24 for hourly...).
13+
The fist column of the output corresponds to Jan. 1st, 00:00:00,
14+
while the last column corresponds to Dec, 31st, 23:59:59.
15+
Entries corresponding to Feb. 29th are masked for non-leap years.
16+
17+
For example, if the initial series has a daily frequency, the 59th column
18+
of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
19+
and the 60th column is masked for non-leap years.
20+
With a hourly initial frequency, the (59*24)th column of the output always
21+
correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
22+
the 24 columns between (59*24) and (61*24) are masked.
23+
24+
If the original frequency is less than daily, the output is equivalent to
25+
``series.convert('A', func=None)``.
26+
27+
Parameters
28+
----------
29+
series : TimeSeries
30+
freq : string or None, default None
31+
32+
33+
Returns
34+
-------
35+
annual : DataFrame
36+
"""
37+
index = series.index
38+
year = index.year
39+
years = nanops.unique1d(year)
40+
41+
if freq is not None:
42+
freq = freq.upper()
43+
44+
if freq == 'D':
45+
width = 366
46+
offset = index.dayofyear - 1
47+
48+
# adjust for leap year
49+
offset[(-isleapyear(year)) & (offset >= 59)] += 1
50+
51+
columns = range(1, 367)
52+
# todo: strings like 1/1, 1/25, etc.?
53+
elif freq in ('M', 'BM'):
54+
width = 12
55+
offset = index.month - 1
56+
columns = range(1, 13)
57+
else:
58+
raise NotImplementedError(freq)
59+
60+
flat_index = (year - years.min()) * width + offset
61+
62+
values = np.empty((len(years), width), dtype=series.dtype)
63+
64+
if not np.issubdtype(series.dtype, np.integer):
65+
values.fill(np.nan)
66+
else:
67+
raise Exception('need to upcast')
68+
69+
values.put(flat_index, series.values)
70+
71+
return DataFrame(values, index=years, columns=columns)
72+
73+
def isleapyear(year):
74+
"""
75+
Returns true if year is a leap year.
76+
77+
Parameters
78+
----------
79+
year : integer / sequence
80+
A given (list of) year(s).
81+
"""
82+
year = np.asarray(year)
83+
return np.logical_or(year % 400 == 0,
84+
np.logical_and(year % 4 == 0, year % 100 > 0))
85+

0 commit comments

Comments
 (0)