Skip to content

Commit 900fc0e

Browse files
committed
stashing changes
1 parent 6e2a80e commit 900fc0e

File tree

4 files changed

+198
-16
lines changed

4 files changed

+198
-16
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
3+
https://en.wikipedia.org/wiki/Binomial_options_pricing_model
4+
"""
5+
6+
from math import exp, sqrt
7+
import numpy as np
8+
9+
class BinomialOptionsPrice:
10+
"""
11+
This calculates the binomial options price given a call or put option.
12+
Input:
13+
expiration_time: int This is the time until the option expires
14+
stock_price: float This is the stock price today
15+
strike_price: float This is the strike price at the time the option expires
16+
interest_rate: float The interest rate for the option
17+
sigma: float The volitility of the stock
18+
tree_height: int The number of levels of the tree
19+
option_type: str Either 'call' or 'put'. Any other value raises an error
20+
Output:
21+
options_price: float
22+
"""
23+
def __init__(self,
24+
expiration_time: int,
25+
stock_price: float,
26+
strike_price: float,
27+
interest_rate: float,
28+
sigma: float,
29+
tree_height: int,
30+
option_type: str) -> None:
31+
self.tree_height = tree_height
32+
self.sigma = sigma
33+
self.interest_rate = interest_rate
34+
self.strike_price = strike_price
35+
self.stock_price = stock_price
36+
self.expiration_time = expiration_time
37+
self.option_type = option_type
38+
39+
if self.expiration_time / self.tree_height >= self.sigma **2 / self.interest_rate**2:
40+
raise ValueError("Time step too big. This will cause the probability to be outside [0, 1]")
41+
42+
def calculate_down(self) -> float:
43+
"""Calculates the down factor"""
44+
return exp(self.sigma * sqrt(self.expiration_time/self.tree_height))
45+
46+
def calculate_up(self) -> float:
47+
"""Calculates the up factor"""
48+
return 1/self.calculate_down()
49+
50+
def calculate_rate_delta_t(self) -> float:
51+
"""Calculates"""
52+
return exp(-self.interest_rate * self.expiration_time/self.tree_height)
53+
54+
def calculate_s_n(self, placement):
55+
return self.stock_price * self.calculate_up()**(self.tree_height - placement)
56+
57+
def calculate_leaf_values(self):
58+
_leaf_list = []
59+
for i in range(self.tree_height + 1):
60+
if self.option_type == 'call':
61+
_leaf_list.append(max(self.calculate_s_n(i) - self.strike_price, 0))
62+
elif self.option_type == 'put':
63+
_leaf_list.append(max(self.strike_price - self.calculate_s_n(i), 0))
64+
else:
65+
raise ValueError("Option type must be either 'call' or 'put'")
66+
return np.array(_leaf_list)
67+
68+
def calculate_node_values(self, i):
69+
70+
return [i] * (self.tree_height + 1)
71+
72+
def calculate_node_values(self, previous_nodes: list, current_tree_level: int):
73+
while current_tree_level - 1 >= 0:
74+
75+
76+
77+
def generate_other_rows(self):
78+
_nodes = np.zeros((self.tree_height, self.tree_height + 1))
79+
_leaves = self.calculate_leaf_values()
80+
# calculate the nodes here
81+
i = self.tree_height - 1
82+
while i >= 0:
83+
_nodes[i, ] = self.calculate_node_values(i)
84+
i -= 1
85+
return np.vstack((_leaves, _nodes))
86+
87+
88+
89+
if __name__ == "__main__":
90+
import doctest
91+
92+
doctest.testmod()
93+
tmp = BinomialOptionsPrice(1, 4.5, 5.6, 0.23, 1, 5, 'call')
94+
print(tmp.generate_other_rows())
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
""" Often called "simple linear regression", univariate linear regression is a
2+
method for determining the best line
3+
4+
Since simple linear regression only uses a single predictor variable,
5+
it is of the form:
6+
7+
y_pred = beta * x + alpha
8+
9+
The values for alpha and beta are calculated as follows.
10+
11+
beta = correlation_x_y * (sample_standard_deviation_y / sample_standard_deviation_x)
12+
alpha = y - beta * x.
13+
14+
15+
"""
16+
def get_data():
17+
pass
18+
19+
def
20+
21+
def sample_correlation_coefficient(x_val, y_val):
22+
pass
23+
24+
def sample_standard_deviation(x_or_y_values):
25+
pass
26+
27+
def simple_linear_regression(x, y):
28+
pass

maths/cooks_distance.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
Cook's Distance is used to estimate the influence of a data point in
33
in least squares regression.
44
5-
Cook's Distance removes each data point and measures the effect of removing the
6-
data point.
5+
Cook's Distance removes each data point one at a time, and measures the effect. Large
6+
Cook's Distance values for an individual data point indicates that data point should
7+
be further investigated. A cutoff for what is large needs to be decided upon, and 1 is
8+
often used.
79
810
The algorithm works as follows:
911
For each data point in the regression, remove the point from the set
@@ -14,10 +16,13 @@
1416
1517
https://en.wikipedia.org/wiki/Cook's_distance
1618
"""
17-
from machine_learning.loss_functions.mean_squared_error import mean_squared_error
19+
1820
import numpy as np
21+
from machine_learning.loss_functions import mean_squared_error
22+
from sklearn import datasets
23+
from sklearn.linear_model import LinearRegression
1924

20-
def calculate_cooks_distance(y_observed: array, y_fitted: array, rank: int) -> array:
25+
def calculate_cooks_distance(y_observed: np.ndarray, y_fitted: np.ndarray, rank: int) -> np.ndarray:
2126
"""Calculate Cook's Distance
2227
Input:
2328
y_observed: numpy array of observed y values
@@ -27,23 +32,29 @@ def calculate_cooks_distance(y_observed: array, y_fitted: array, rank: int) -> a
2732
cooks_distance: numpy array of Cook's distance for each y value.
2833
2934
"""
30-
import numpy as np
31-
_mse = mean_squared_error(y_observed, y_fitted)
32-
_y_difference_squared = (y_observed - y_fitted)**2
35+
mse = mean_squared_error(y_observed, y_fitted)
36+
y_difference_squared = (y_observed - y_fitted)**2
3337

3438
if isinstance(rank) is not int:
3539
msg = f"Rank is an integer representing the number of predictors. Input: {rank}"
3640
raise TypeError(msg)
3741

38-
if len(y_observed) != len(y_fitted):
39-
msg = f"The arrays of observed and fitted values must be equal length. Currently
40-
observed = {len(y_observed)} and fitted = {len(y_fitted)}"
42+
if len(y_observed) != len(y_fitted) or len(y_observed) == 0:
43+
msg = f"The arrays of observed and fitted values must be equal length and non-empty.
44+
Currently observed = {len(y_observed)} and fitted = {len(y_fitted)}"
4145
raise ValueError(msg)
4246

43-
if len(y_observed) == 0:
44-
raise ValueError("The y value arrays must not be empty")
45-
46-
_summed_difference = sum(_y_difference_squared)
47-
for item in np.nditer(_y_difference_squared):
48-
k = (_summed_difference - item) / (rank * _mse)
47+
# This is leave one out, so summing over all and then individually subtracting.
48+
summed_difference = sum(y_difference_squared)
49+
for item in np.nditer(y_difference_squared):
50+
k = (summed_difference - item) / (rank * mse)
4951

52+
if __name__ == "__main__":
53+
import doctest
54+
55+
doctest.testmod(verbose=True)
56+
mdl = LinearRegression()
57+
mdl.fit(df.Jumps.shape(-1, 1), df.Pulse)
58+
59+
main()
60+

maths/cooks_distance_BAK.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Cook's Distance is used to estimate the influence of a data point in
3+
in least squares regression.
4+
5+
Cook's Distance removes each data point and measures the effect of removing the
6+
data point.
7+
8+
The algorithm works as follows:
9+
For each data point in the regression, remove the point from the set
10+
and calculate the effect of removing that point.
11+
12+
D_i = (sum over all other points(y_actual - y_observed)^2) / (rank * MSE^2)
13+
14+
15+
https://en.wikipedia.org/wiki/Cook's_distance
16+
"""
17+
from machine_learning.loss_functions.mean_squared_error import mean_squared_error
18+
import numpy as np
19+
20+
def calculate_cooks_distance(y_observed: array, y_fitted: array, rank: int) -> array:
21+
"""Calculate Cook's Distance
22+
Input:
23+
y_observed: numpy array of observed y values
24+
y_fitted: numpy array of fitted y values from linear regression model
25+
rank: int representing the number of coefficients
26+
Output:
27+
cooks_distance: numpy array of Cook's distance for each y value.
28+
29+
"""
30+
import numpy as np
31+
_mse = mean_squared_error(y_observed, y_fitted)
32+
_y_difference_squared = (y_observed - y_fitted)**2
33+
34+
if isinstance(rank) is not int:
35+
msg = f"Rank is an integer representing the number of predictors. Input: {rank}"
36+
raise TypeError(msg)
37+
38+
if len(y_observed) != len(y_fitted):
39+
msg = f"The arrays of observed and fitted values must be equal length. Currently
40+
observed = {len(y_observed)} and fitted = {len(y_fitted)}"
41+
raise ValueError(msg)
42+
43+
if len(y_observed) == 0:
44+
raise ValueError("The y value arrays must not be empty")
45+
46+
_summed_difference = sum(_y_difference_squared)
47+
for item in np.nditer(_y_difference_squared):
48+
k = (_summed_difference - item) / (rank * _mse)
49+

0 commit comments

Comments
 (0)