Skip to content

Commit 92afbb5

Browse files
committed
add some computation functions for groupby procedure
1 parent daf52fc commit 92afbb5

File tree

2 files changed

+53
-17
lines changed

2 files changed

+53
-17
lines changed

ddf_utils/ops.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""commonly used calculation methods"""
4+
5+
import pandas as pd
6+
import numpy as np
7+
8+
9+
# Groupby Aggrgate
10+
11+
12+
# Groupby Transform
13+
def zcore(x):
14+
return (x - x.mean()) / x.std()
15+
16+
17+
# Groupby Filter
18+
def gt(x, val, how='all', include_eq=False):
19+
f = getattr(np, how)
20+
if include_eq:
21+
return f(x >= val)
22+
else:
23+
return f(x > val)
24+
25+
26+
def lt(x, val, how='all', include_eq=False):
27+
f = getattr(np, how)
28+
if include_eq:
29+
return f(x <= val)
30+
else:
31+
return f(x <= val)
32+
33+
34+
def between(x, upper, lower, how='all', include_upper=False, include_lower=False):
35+
return gt(x, lower, how, include_lower) and lt(x, upper, how, include_upper)
36+
37+
38+
# Rolling
39+
def aagr(df: pd.DataFrame, window: int=10): # TODO: don't include the window
40+
"""average annual growth rate
41+
42+
Parameters
43+
----------
44+
window : `int`
45+
the rolling window size
46+
47+
Returns
48+
-------
49+
return : `DataFrame`
50+
The rolling apply result
51+
"""
52+
pct = df.pct_change()
53+
return pct.rolling(window).apply(np.mean).dropna()

ddf_utils/transformer.py

-17
Original file line numberDiff line numberDiff line change
@@ -166,23 +166,6 @@ def translate_header(df, dictionary, dictionary_type='inline'):
166166
raise ValueError('dictionary not supported: '+dictionary_type)
167167

168168

169-
def aagr(df: pd.DataFrame, window: int=10): # TODO: create a op.py file for this kind of functions?
170-
"""average annual growth rate
171-
172-
Parameters
173-
----------
174-
window : `int`
175-
the rolling window size
176-
177-
Returns
178-
-------
179-
return : `DataFrame`
180-
The rolling apply result
181-
"""
182-
pct = df.pct_change()
183-
return pct.rolling(window).apply(np.mean).dropna()
184-
185-
186169
def trend_bridge(old_data, new_data, bridge_length):
187170
"""smoothing data between series.
188171

0 commit comments

Comments
 (0)