1
+ import numpy as np
2
+
3
+ from pandas .core .frame import DataFrame
4
+ import pandas .core .nanops as nanops
5
+ from pandas .tseries .util import isleapyear
6
+ from pandas .tseries .index import date_range
7
+
8
+ def pivot_annual_h (series , freq = None , dt_index = False ):
9
+ """
10
+ Group a series by years, taking leap years into account.
11
+
12
+ The output has as many rows as distinct years in the original series,
13
+ and as many columns as the length of a leap year in the units corresponding
14
+ to the original frequency (366 for daily frequency, 366*24 for hourly...).
15
+ The fist column of the output corresponds to Jan. 1st, 00:00:00,
16
+ while the last column corresponds to Dec, 31st, 23:59:59.
17
+ Entries corresponding to Feb. 29th are masked for non-leap years.
18
+
19
+ For example, if the initial series has a daily frequency, the 59th column
20
+ of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
21
+ and the 60th column is masked for non-leap years.
22
+ With a hourly initial frequency, the (59*24)th column of the output always
23
+ correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
24
+ the 24 columns between (59*24) and (61*24) are masked.
25
+
26
+ If the original frequency is less than daily, the output is equivalent to
27
+ ``series.convert('A', func=None)``.
28
+
29
+ Parameters
30
+ ----------
31
+ series : TimeSeries
32
+ freq : string or None, default None
33
+
34
+ Returns
35
+ -------
36
+ annual : DataFrame
37
+
38
+
39
+ """
40
+ #TODO: test like original pandas and the position of first and last value in arrays
41
+ #TODO: reduce number of hardcoded values scattered all around.
42
+ index = series .index
43
+ year = index .year
44
+ years = nanops .unique1d (year )
45
+
46
+ if freq is not None :
47
+ freq = freq .upper ()
48
+ else :
49
+ freq = series .index .freq
50
+
51
+ if freq == 'H' :
52
+
53
+ ##basics
54
+
55
+ #integer value of sum of all hours in a leap hear
56
+ total_hoy_leap = (year_length (series .index .freqstr ))
57
+
58
+ #list of all hours in a leap year
59
+ hoy_leap_list = range (1 , (total_hoy_leap + 1 ))
60
+
61
+
62
+
63
+ values = np .empty ((total_hoy_leap , len (years )), dtype = series .dtype )
64
+ values .fill (np .nan )
65
+
66
+ dummy_df = DataFrame (values , index = hoy_leap_list ,
67
+ columns = years )
68
+
69
+ ##get offset for leap hours
70
+
71
+ #see:
72
+ #http://stackoverflow.com/questions/2004364/increment-numpy-array-with-repeated-indices
73
+ #1994-02-28 23:00:00 -> index 1415
74
+ ind_z = np .array (range (0 , 8760 ))
75
+ ind_i = np .array (range (1416 ,8760 ))
76
+
77
+ ind_t = ind_z .copy ()
78
+ ind_t [ind_i ]+= 24
79
+
80
+ #TODO: beautify variable names
81
+ for year in years :
82
+
83
+ # select data for the respective year
84
+ ser_sel = series [ series .index .year == year ]
85
+ info = (ser_sel ).values
86
+
87
+
88
+
89
+ if isleapyear (year ):
90
+ dummy_df [year ] = info
91
+ else :
92
+ data = np .empty ((total_hoy_leap ), dtype = series .dtype )
93
+ data .fill (np .nan )
94
+
95
+ ser_sel = series [ series .index .year == year ]
96
+ info = (ser_sel ).values
97
+
98
+ data .put (ind_t , (series [ series .index .year == year ]).values )
99
+
100
+ dummy_df [year ] = data
101
+
102
+ res_df = dummy_df
103
+
104
+ #assign a datetime index, CAUTION: the year is definatly wrong!
105
+ if dt_index :
106
+ rng = default_rng ()
107
+ res_df = DataFrame (res_df .values , index = rng ,
108
+ columns = res_df .columns )
109
+
110
+ return res_df
111
+
112
+ #TDOO: use pivot_annual for D & M and minute in the same fashion
113
+ if freq == 'D' :
114
+ raise NotImplementedError (freq ), "use pandas.tseries.util.pivot_annual"
115
+
116
+ if freq == 'M' :
117
+ raise NotImplementedError (freq ), "use pandas.tseries.util.pivot_annual"
118
+
119
+ else :
120
+ raise NotImplementedError (freq )
121
+
122
+
123
+ return res_df
124
+
125
+
126
+ ### timeseries pivoting helper
127
+
128
+ def last_col2front (df , col_no = 1 ):
129
+ """shifts the last column of a data frame to the front
130
+
131
+ increase col_no to shift more cols
132
+ """
133
+ cols = cols = df .columns .tolist ()
134
+ #increase index value to 2+ if more columns are to be shifted
135
+ cols = cols [- col_no :] + cols [:- col_no ]
136
+ df = df [cols ]
137
+
138
+ return df
139
+
140
+
141
+ def extended_info (df , time_cols = True , aggreg = True , aggreg_func = None ,
142
+ datetime_index = False ):
143
+ """add extended information to a timeseries pivot
144
+ """
145
+
146
+ df_extended = df .copy ()
147
+ #perform the following only on the data columns
148
+ cols = df_extended .columns
149
+ #TODO: add standard aggregation
150
+ #TODO: make function be set by argument
151
+ #TODO: is there no a SM describe function?
152
+ #TODO: Maybe use http://pandas.pydata.org/pandas-docs/dev/basics.html#summarizing-data-describe
153
+ if aggreg :
154
+
155
+ df_extended ['mean' ] = df_extended [cols ].mean (1 )
156
+ df_extended ['sum' ] = df_extended [cols ].sum (1 )
157
+ df_extended ['min' ] = df_extended [cols ].min (1 )
158
+ df_extended ['max' ] = df_extended [cols ].max (1 )
159
+ df_extended ['max' ] = df_extended [cols ].std (1 )
160
+
161
+ #add some metadata
162
+ #TODO: add function to make index a datetime with the argument above using the rng below
163
+ #TODO: convert the range to lower frequencies and reuse the function.
164
+ rng = default_rng ()
165
+ df_extended ['doy' ] = rng .dayofyear
166
+ # df_extended = last_col2front(df_extended)
167
+ df_extended ['month' ] = rng .month
168
+ # df_extended = last_col2front(df_extended)
169
+ df_extended ['day' ] = rng .day
170
+ # df_extended = last_col2front(df_extended)
171
+ df_extended ['hour' ] = rng .hour + 1
172
+ df_extended = last_col2front (df_extended , col_no = 4 )
173
+
174
+ return df_extended
175
+
176
+ ###Timeseries convenience / helper functions
177
+
178
+
179
+ def year_length (freq , leap = True ):
180
+ """helper function for year length at different frequencies.
181
+ to be expanded
182
+ """
183
+
184
+ daysofyear_leap = 366
185
+ daysofyear_nonleap = 365
186
+
187
+ if freq == 'H' :
188
+ if leap :
189
+ length = 24 * daysofyear_leap
190
+ else :
191
+ length = 24 * daysofyear_nonleap
192
+
193
+ return length
194
+
195
+ def default_rng (freq = 'H' , leap = True ):
196
+ """create default ranges
197
+ """
198
+
199
+ if leap :
200
+ total_hoy_leap = (year_length (freq = 'H' ))
201
+ rng = date_range ('1/1/2012' , periods = total_hoy_leap , freq = 'H' )
202
+
203
+ else :
204
+ total_hoy_nonleap = (year_length (freq = 'H' ))
205
+ rng = date_range ('1/1/2011' , periods = total_hoy_nonleap , freq = 'H' )
206
+
207
+ return rng
0 commit comments