From 722584e4fefb8b3207a7f9d580cb225c4d554171 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 11:31:17 +0700 Subject: [PATCH 01/38] add forecasting code --- machine_learning/forecasting/__init__.py | 0 machine_learning/forecasting/ex_data.csv | 114 +++++++++++++++++++++++ machine_learning/forecasting/methods.py | 46 +++++++++ machine_learning/forecasting/run.py | 71 ++++++++++++++ 4 files changed, 231 insertions(+) create mode 100644 machine_learning/forecasting/__init__.py create mode 100644 machine_learning/forecasting/ex_data.csv create mode 100644 machine_learning/forecasting/methods.py create mode 100644 machine_learning/forecasting/run.py diff --git a/machine_learning/forecasting/__init__.py b/machine_learning/forecasting/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/machine_learning/forecasting/ex_data.csv b/machine_learning/forecasting/ex_data.csv new file mode 100644 index 000000000000..1c429e649755 --- /dev/null +++ b/machine_learning/forecasting/ex_data.csv @@ -0,0 +1,114 @@ +total_user,total_events,days +18231,0.0,1 +22621,1.0,2 +15675,0.0,3 +23583,1.0,4 +68351,5.0,5 +34338,3.0,6 +19238,0.0,0 +24192,0.0,1 +70349,0.0,2 +103510,0.0,3 +128355,1.0,4 +148484,6.0,5 +153489,3.0,6 +162667,1.0,0 +311430,3.0,1 +435663,7.0,2 +273526,0.0,3 +628588,2.0,4 +454989,13.0,5 +539040,3.0,6 +52974,1.0,0 +103451,2.0,1 +810020,5.0,2 +580982,3.0,3 +216515,0.0,4 +134694,10.0,5 +93563,1.0,6 +55432,1.0,0 +169634,1.0,1 +254908,4.0,2 +315285,3.0,3 +191764,0.0,4 +514284,7.0,5 +181214,4.0,6 +78459,2.0,0 +161620,3.0,1 +245610,4.0,2 +326722,5.0,3 +214578,0.0,4 +312365,5.0,5 +232454,4.0,6 +178368,1.0,0 +97152,1.0,1 +222813,4.0,2 +285852,4.0,3 +192149,1.0,4 +142241,1.0,5 +173011,2.0,6 +56488,3.0,0 +89572,2.0,1 +356082,2.0,2 +172799,0.0,3 +142300,1.0,4 +78432,2.0,5 +539023,9.0,6 +62389,1.0,0 +70247,1.0,1 +89229,0.0,2 +94583,1.0,3 +102455,0.0,4 +129270,0.0,5 +311409,1.0,6 +1837026,0.0,0 +361824,0.0,1 +111379,2.0,2 +76337,2.0,3 +96747,0.0,4 +92058,0.0,5 +81929,2.0,6 +143423,0.0,0 +82939,0.0,1 +74403,1.0,2 +68234,0.0,3 +94556,1.0,4 +80311,0.0,5 +75283,3.0,6 +77724,0.0,0 +49229,2.0,1 +65708,2.0,2 +273864,1.0,3 +1711281,0.0,4 +1900253,5.0,5 +343071,1.0,6 +1551326,0.0,0 +56636,1.0,1 +272782,2.0,2 +1785678,0.0,3 +241866,0.0,4 +461904,0.0,5 +2191901,2.0,6 +102925,0.0,0 +242778,1.0,1 +298608,0.0,2 +322458,10.0,3 +216027,9.0,4 +916052,12.0,5 +193278,12.0,6 +263207,8.0,0 +672948,10.0,1 +281909,1.0,2 +384562,1.0,3 +1027375,2.0,4 +828905,9.0,5 +624188,22.0,6 +392218,8.0,0 +292581,10.0,1 +299869,12.0,2 +769455,20.0,3 +316443,8.0,4 +1212864,24.0,5 +1397338,28.0,6 +223249,8.0,0 +191264,14.0,1 diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py new file mode 100644 index 000000000000..c5aaafa0a39c --- /dev/null +++ b/machine_learning/forecasting/methods.py @@ -0,0 +1,46 @@ +import numpy as np +from statsmodels.tsa.statespace.sarimax import SARIMAX +from sklearn.svm import SVR + + +def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): + # linear regression, return float + x = [] + for i in range(len(train_dt)): + x.append([1, train_dt[i], train_mtch[i]]) + x = np.array(x) + y = np.array(train_usr) + beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) + prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) + return prediction + + +def sarimax_predictor(train_user, train_match, test_match): + # sarimax, return list of float + order = (1, 2, 1) + s_order = (1, 1, 0, 7) + model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) + model_fit = model.fit(disp=False, maxiter=600, method='nm') + result = model_fit.predict(1, len(test_match), exog=[test_match]) + return result[0] + + +def support_machine_regressor(x_train, x_test, train_user): + # svr, return list of float + regressor = SVR(kernel='rbf', C=1, gamma=0.1, epsilon=.1) + regressor.fit(x_train, train_user) + y_pred = regressor.predict(x_test) + + return y_pred[0] + + +def interquartile_range_checker(train_user): + # optional + # return low limit and upper limit for outlier + train_user.sort() + q1 = np.percentile(train_user, 25) + q3 = np.percentile(train_user, 75) + iqr = q3 - q1 + low_lim = q1 - (iqr * 0.1) + + return low_lim diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py new file mode 100644 index 000000000000..4ac39c25c5dc --- /dev/null +++ b/machine_learning/forecasting/run.py @@ -0,0 +1,71 @@ +''' +this is code for forecasting +but i modified it and used it for safety checker of data +for ex: you have a online shop and for some reason some data are +missing (the amount of data that u expected are not supposed to be) + then we can use it +*ps : 1. ofc we can use normal statistic method but in this case + the data is quite absurd and only a little^^ + 2. ofc u can use this and modified it for forecasting purpose + for the next 3 months sales or something, + u can just adjust it for ur own purpose +''' + +from sklearn.preprocessing import Normalizer +import pandas as pd +from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor + + +def data_safety_checker(list_vote, actual_result): + safe = 0 + not_safe = 0 + for i in list_vote: + if i > actual_result: + safe = not_safe + 1 + else : + if (abs(abs(i) - abs(actual_result[0])) <= 0.1): + safe = safe + 1 + else: + not_safe = not_safe + 1 + if safe > not_safe : + print("today's data is safe") + else : + print("today's data is not safe") + + +data_input_df = pd.read_csv("ex_data.csv") +''' +data column = total user in a day, how much online event held in one day, +what day is that(sunday-saturday) +''' + +# start normalization +normalize_df = Normalizer().fit_transform(data_input_df.values) +# split data +total_date = normalize_df[: , 2].tolist() +total_user = normalize_df[: , 0].tolist() +total_match = normalize_df[: , 1].tolist() + +# for svr (input variable = total date and total match) +x = normalize_df[:, [1 , 2]].tolist() +x_train = x[:len(x) - 1] +x_test = x[len(x) - 1:] + +# for linear reression & sarimax +trn_date = total_date[:len(total_date) - 1] +trn_user = total_user[:len(total_user) - 1] +trn_match = total_match[:len(total_match) - 1] + +tst_date = total_date[len(total_date) - 1:] +tst_user = total_user[len(total_user) - 1:] +tst_match = total_match[len(total_match) - 1:] + + +# voting system with forecasting +res_vote = [] +res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match)) +res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match)) +res_vote.append(support_machine_regressor(x_train, x_test, trn_user)) + +# check the safety of todays'data^^ +data_safety_checker(res_vote, tst_user) From 0b823da4a9f6ec973aa7e8e8bc86d89b68a9890d Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 11:50:34 +0700 Subject: [PATCH 02/38] add statsmodel --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 31dc586c29db..4b452cbdede5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ sklearn sympy tensorflow xgboost +statsmodels From aab3a718936c1da93ba65dd0882139cf47afceec Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 11:55:36 +0700 Subject: [PATCH 03/38] sort import --- machine_learning/forecasting/methods.py | 2 +- machine_learning/forecasting/run.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py index c5aaafa0a39c..cef244e1b4ff 100644 --- a/machine_learning/forecasting/methods.py +++ b/machine_learning/forecasting/methods.py @@ -1,6 +1,6 @@ import numpy as np -from statsmodels.tsa.statespace.sarimax import SARIMAX from sklearn.svm import SVR +from statsmodels.tsa.statespace.sarimax import SARIMAX def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 4ac39c25c5dc..81874e5fe003 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,9 +11,9 @@ u can just adjust it for ur own purpose ''' -from sklearn.preprocessing import Normalizer -import pandas as pd from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor +import pandas as pd +from sklearn.preprocessing import Normalizer def data_safety_checker(list_vote, actual_result): From 8f6941847cc788e834b33e9b5b0dae1d4557fe97 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 11:59:41 +0700 Subject: [PATCH 04/38] sort import fix --- machine_learning/forecasting/methods.py | 1 + machine_learning/forecasting/run.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py index cef244e1b4ff..c62fc06146b6 100644 --- a/machine_learning/forecasting/methods.py +++ b/machine_learning/forecasting/methods.py @@ -1,5 +1,6 @@ import numpy as np from sklearn.svm import SVR + from statsmodels.tsa.statespace.sarimax import SARIMAX diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 81874e5fe003..9e15b29462ea 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,10 +11,11 @@ u can just adjust it for ur own purpose ''' -from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor import pandas as pd from sklearn.preprocessing import Normalizer +from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor + def data_safety_checker(list_vote, actual_result): safe = 0 From 31b89268bdec2af0e7e6575cb3acf50b336b3a61 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:05:30 +0700 Subject: [PATCH 05/38] fixing black --- machine_learning/forecasting/methods.py | 4 +-- machine_learning/forecasting/run.py | 40 ++++++++++++------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py index c62fc06146b6..28a74bfcc98b 100644 --- a/machine_learning/forecasting/methods.py +++ b/machine_learning/forecasting/methods.py @@ -21,14 +21,14 @@ def sarimax_predictor(train_user, train_match, test_match): order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) - model_fit = model.fit(disp=False, maxiter=600, method='nm') + model_fit = model.fit(disp=False, maxiter=600, method="nm") result = model_fit.predict(1, len(test_match), exog=[test_match]) return result[0] def support_machine_regressor(x_train, x_test, train_user): # svr, return list of float - regressor = SVR(kernel='rbf', C=1, gamma=0.1, epsilon=.1) + regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 9e15b29462ea..5da2b7e68fc2 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -1,4 +1,4 @@ -''' +""" this is code for forecasting but i modified it and used it for safety checker of data for ex: you have a online shop and for some reason some data are @@ -9,7 +9,7 @@ 2. ofc u can use this and modified it for forecasting purpose for the next 3 months sales or something, u can just adjust it for ur own purpose -''' +""" import pandas as pd from sklearn.preprocessing import Normalizer @@ -23,43 +23,43 @@ def data_safety_checker(list_vote, actual_result): for i in list_vote: if i > actual_result: safe = not_safe + 1 - else : - if (abs(abs(i) - abs(actual_result[0])) <= 0.1): + else: + if abs(abs(i) - abs(actual_result[0])) <= 0.1: safe = safe + 1 else: not_safe = not_safe + 1 - if safe > not_safe : + if safe > not_safe: print("today's data is safe") - else : + else: print("today's data is not safe") data_input_df = pd.read_csv("ex_data.csv") -''' +""" data column = total user in a day, how much online event held in one day, what day is that(sunday-saturday) -''' +""" # start normalization normalize_df = Normalizer().fit_transform(data_input_df.values) # split data -total_date = normalize_df[: , 2].tolist() -total_user = normalize_df[: , 0].tolist() -total_match = normalize_df[: , 1].tolist() +total_date = normalize_df[:, 2].tolist() +total_user = normalize_df[:, 0].tolist() +total_match = normalize_df[:, 1].tolist() # for svr (input variable = total date and total match) -x = normalize_df[:, [1 , 2]].tolist() -x_train = x[:len(x) - 1] -x_test = x[len(x) - 1:] +x = normalize_df[:, [1, 2]].tolist() +x_train = x[: len(x) - 1] +x_test = x[len(x) - 1 :] # for linear reression & sarimax -trn_date = total_date[:len(total_date) - 1] -trn_user = total_user[:len(total_user) - 1] -trn_match = total_match[:len(total_match) - 1] +trn_date = total_date[: len(total_date) - 1] +trn_user = total_user[: len(total_user) - 1] +trn_match = total_match[: len(total_match) - 1] -tst_date = total_date[len(total_date) - 1:] -tst_user = total_user[len(total_user) - 1:] -tst_match = total_match[len(total_match) - 1:] +tst_date = total_date[len(total_date) - 1 :] +tst_user = total_user[len(total_user) - 1 :] +tst_match = total_match[len(total_match) - 1 :] # voting system with forecasting From e3ba4fa72003ca6fe199195b403d0f8cd5aa45f1 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:10:49 +0700 Subject: [PATCH 06/38] sort requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4b452cbdede5..2ee0e13282d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ pillow requests scikit-fuzzy sklearn +statsmodels sympy tensorflow xgboost -statsmodels From 058889bc359ee6a7a4615c0be06e47282076bfb5 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:12:53 +0700 Subject: [PATCH 07/38] optimize code --- machine_learning/forecasting/methods.py | 47 ------------------------- machine_learning/forecasting/run.py | 46 +++++++++++++++++++++++- 2 files changed, 45 insertions(+), 48 deletions(-) delete mode 100644 machine_learning/forecasting/methods.py diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py deleted file mode 100644 index 28a74bfcc98b..000000000000 --- a/machine_learning/forecasting/methods.py +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np -from sklearn.svm import SVR - -from statsmodels.tsa.statespace.sarimax import SARIMAX - - -def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return float - x = [] - for i in range(len(train_dt)): - x.append([1, train_dt[i], train_mtch[i]]) - x = np.array(x) - y = np.array(train_usr) - beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) - prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) - return prediction - - -def sarimax_predictor(train_user, train_match, test_match): - # sarimax, return list of float - order = (1, 2, 1) - s_order = (1, 1, 0, 7) - model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) - model_fit = model.fit(disp=False, maxiter=600, method="nm") - result = model_fit.predict(1, len(test_match), exog=[test_match]) - return result[0] - - -def support_machine_regressor(x_train, x_test, train_user): - # svr, return list of float - regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) - regressor.fit(x_train, train_user) - y_pred = regressor.predict(x_test) - - return y_pred[0] - - -def interquartile_range_checker(train_user): - # optional - # return low limit and upper limit for outlier - train_user.sort() - q1 = np.percentile(train_user, 25) - q3 = np.percentile(train_user, 75) - iqr = q3 - q1 - low_lim = q1 - (iqr * 0.1) - - return low_lim diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 5da2b7e68fc2..6d6877b0d58c 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,10 +11,54 @@ u can just adjust it for ur own purpose """ +import numpy as np import pandas as pd from sklearn.preprocessing import Normalizer +from sklearn.svm import SVR +from statsmodels.tsa.statespace.sarimax import SARIMAX -from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor + +def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): + # linear regression, return float + x = [] + for i in range(len(train_dt)): + x.append([1, train_dt[i], train_mtch[i]]) + x = np.array(x) + y = np.array(train_usr) + beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) + prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) + return prediction + + +def sarimax_predictor(train_user, train_match, test_match): + # sarimax, return list of float + order = (1, 2, 1) + s_order = (1, 1, 0, 7) + model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) + model_fit = model.fit(disp=False, maxiter=600, method="nm") + result = model_fit.predict(1, len(test_match), exog=[test_match]) + return result[0] + + +def support_machine_regressor(x_train, x_test, train_user): + # svr, return list of float + regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) + regressor.fit(x_train, train_user) + y_pred = regressor.predict(x_test) + + return y_pred[0] + + +def interquartile_range_checker(train_user): + # optional + # return low limit and upper limit for outlier + train_user.sort() + q1 = np.percentile(train_user, 25) + q3 = np.percentile(train_user, 75) + iqr = q3 - q1 + low_lim = q1 - (iqr * 0.1) + + return low_lim def data_safety_checker(list_vote, actual_result): From 7a58104d4e62f124b52d7a22093f32f76e256f80 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:54:14 +0700 Subject: [PATCH 08/38] try with limited data --- machine_learning/forecasting/run.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 6d6877b0d58c..1e8dcf97f146 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,12 +11,19 @@ u can just adjust it for ur own purpose """ +import warnings import numpy as np import pandas as pd from sklearn.preprocessing import Normalizer from sklearn.svm import SVR +from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.tsa.statespace.sarimax import SARIMAX +warnings.simplefilter("ignore", ConvergenceWarning) +warnings.simplefilter("ignore", UserWarning) +warnings.simplefilter("ignore", RuntimeWarning) +np.seterr(divide="ignore", invalid="ignore") + def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): # linear regression, return float @@ -78,7 +85,10 @@ def data_safety_checker(list_vote, actual_result): print("today's data is not safe") -data_input_df = pd.read_csv("ex_data.csv") +# data_input_df = pd.read_csv("ex_data.csv", header=None) +list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] +data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) + """ data column = total user in a day, how much online event held in one day, what day is that(sunday-saturday) From 6f0b775084f35cb9435a9da8195c651459e5e22b Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:56:33 +0700 Subject: [PATCH 09/38] sort again --- machine_learning/forecasting/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 1e8dcf97f146..b70b85e81553 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -12,10 +12,12 @@ """ import warnings + import numpy as np import pandas as pd from sklearn.preprocessing import Normalizer from sklearn.svm import SVR + from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.tsa.statespace.sarimax import SARIMAX From 5fd4b0536e4968da4e09ab8a5ef2027fbc16daaa Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:58:56 +0700 Subject: [PATCH 10/38] sort fix --- machine_learning/forecasting/run.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b70b85e81553..0f5ccb7413d0 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,20 +11,18 @@ u can just adjust it for ur own purpose """ -import warnings - import numpy as np import pandas as pd from sklearn.preprocessing import Normalizer from sklearn.svm import SVR - from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.tsa.statespace.sarimax import SARIMAX +import warnings -warnings.simplefilter("ignore", ConvergenceWarning) +warnings.simplefilter('ignore', ConvergenceWarning) warnings.simplefilter("ignore", UserWarning) warnings.simplefilter("ignore", RuntimeWarning) -np.seterr(divide="ignore", invalid="ignore") +np.seterr(divide='ignore', invalid='ignore') def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): @@ -89,7 +87,7 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] -data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) +data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days']) """ data column = total user in a day, how much online event held in one day, From 4864b411209782650fe08f6a652ef68decc1bc88 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 12:58:56 +0700 Subject: [PATCH 11/38] sort fix --- machine_learning/forecasting/run.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b70b85e81553..33416606bdca 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,21 +11,13 @@ u can just adjust it for ur own purpose """ -import warnings - import numpy as np import pandas as pd from sklearn.preprocessing import Normalizer from sklearn.svm import SVR - from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.tsa.statespace.sarimax import SARIMAX -warnings.simplefilter("ignore", ConvergenceWarning) -warnings.simplefilter("ignore", UserWarning) -warnings.simplefilter("ignore", RuntimeWarning) -np.seterr(divide="ignore", invalid="ignore") - def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): # linear regression, return float @@ -89,7 +81,7 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] -data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) +data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days']) """ data column = total user in a day, how much online event held in one day, From 122fdc68692a020ed263f167e5eb81c4e737f84e Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 13:03:20 +0700 Subject: [PATCH 12/38] delete warning and black --- machine_learning/forecasting/run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 33416606bdca..b773ede5f91c 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -15,7 +15,6 @@ import pandas as pd from sklearn.preprocessing import Normalizer from sklearn.svm import SVR -from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.tsa.statespace.sarimax import SARIMAX @@ -81,7 +80,7 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] -data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days']) +data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) """ data column = total user in a day, how much online event held in one day, From b3df9254205d1c4bd9314ad284896b90e47e738c Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 13:08:32 +0700 Subject: [PATCH 13/38] add code for forecasting --- machine_learning/forecasting/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 16bd28a0529f..c1ec2da34a28 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -16,7 +16,6 @@ from sklearn.preprocessing import Normalizer from sklearn.svm import SVR from statsmodels.tsa.statespace.sarimax import SARIMAX -import warnings def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): From cfa6a2f2d03a136eeec396f2536aae75c6648cca Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Mon, 12 Oct 2020 13:09:09 +0700 Subject: [PATCH 14/38] use black --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index c1ec2da34a28..b773ede5f91c 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -80,7 +80,7 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] -data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days']) +data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) """ data column = total user in a day, how much online event held in one day, From cefc5f4505a4b2211f2c54e87076c3b65286280f Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 09:22:17 +0700 Subject: [PATCH 15/38] add more hints to describe --- machine_learning/forecasting/run.py | 51 +++++++++++++++++++---------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b773ede5f91c..d7cb01569ae1 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -18,30 +18,44 @@ from statsmodels.tsa.statespace.sarimax import SARIMAX -def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return float +def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): + """ + First method: linear regression + input : training data (date, total_user, total_event) in list of float + output : list of total user prediction in float + """ x = [] for i in range(len(train_dt)): - x.append([1, train_dt[i], train_mtch[i]]) + x.append([1, train_dt[i], train_evnt[i]]) x = np.array(x) y = np.array(train_usr) beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) - prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) + prediction = abs(beta[0] + test_dt[0] * beta[1] + test_evnt[0] + beta[2]) return prediction -def sarimax_predictor(train_user, train_match, test_match): - # sarimax, return list of float +def sarimax_predictor(train_user, train_evnt, test_evnt): + """ + second method: sarimax + input : training data (total_user, + with exog data = total_event) in list of float + output : list of total user prediction in float + """ order = (1, 2, 1) s_order = (1, 1, 0, 7) - model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) + model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order) model_fit = model.fit(disp=False, maxiter=600, method="nm") - result = model_fit.predict(1, len(test_match), exog=[test_match]) + result = model_fit.predict(1, len(test_evnt), exog=[test_evnt]) return result[0] def support_machine_regressor(x_train, x_test, train_user): - # svr, return list of float + """ + Third method: SVR + input : training data (date, total_user, total_event) in list of float + where x = list of set (date and total event) + output : list of total user prediction in float + """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) @@ -50,8 +64,12 @@ def support_machine_regressor(x_train, x_test, train_user): def interquartile_range_checker(train_user): - # optional - # return low limit and upper limit for outlier + """ + Optional method: interquatile range + input : list of total user in float + output : low limit of input in float + this method can be used to check whether some data is outlier or not + """ train_user.sort() q1 = np.percentile(train_user, 25) q3 = np.percentile(train_user, 75) @@ -81,7 +99,6 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) - """ data column = total user in a day, how much online event held in one day, what day is that(sunday-saturday) @@ -92,7 +109,7 @@ def data_safety_checker(list_vote, actual_result): # split data total_date = normalize_df[:, 2].tolist() total_user = normalize_df[:, 0].tolist() -total_match = normalize_df[:, 1].tolist() +total_event = normalize_df[:, 1].tolist() # for svr (input variable = total date and total match) x = normalize_df[:, [1, 2]].tolist() @@ -102,17 +119,17 @@ def data_safety_checker(list_vote, actual_result): # for linear reression & sarimax trn_date = total_date[: len(total_date) - 1] trn_user = total_user[: len(total_user) - 1] -trn_match = total_match[: len(total_match) - 1] +trn_event = total_event[: len(total_event) - 1] tst_date = total_date[len(total_date) - 1 :] tst_user = total_user[len(total_user) - 1 :] -tst_match = total_match[len(total_match) - 1 :] +tst_event = total_event[len(total_event) - 1 :] # voting system with forecasting res_vote = [] -res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match)) -res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match)) +res_vote.append(lin_reg_pred(trn_date, trn_user, trn_event, tst_date, tst_event)) +res_vote.append(sarimax_predictor(trn_user, trn_event, tst_event)) res_vote.append(support_machine_regressor(x_train, x_test, trn_user)) # check the safety of todays'data^^ From 85b37cc4de22e1b5ca2951552085a96a434bdb04 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 09:36:05 +0700 Subject: [PATCH 16/38] add doctest --- machine_learning/forecasting/run.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index d7cb01569ae1..60a330ce7bcf 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -23,6 +23,8 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): First method: linear regression input : training data (date, total_user, total_event) in list of float output : list of total user prediction in float + >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4] [2,1], [2,2]) + [3.95, 4.25] """ x = [] for i in range(len(train_dt)): @@ -40,6 +42,8 @@ def sarimax_predictor(train_user, train_evnt, test_evnt): input : training data (total_user, with exog data = total_event) in list of float output : list of total user prediction in float + >>> sarimax_predictor([5,7,8,9], [3,1,2,4], [2,1]) + [10.67, 13.15] """ order = (1, 2, 1) s_order = (1, 1, 0, 7) @@ -55,6 +59,8 @@ def support_machine_regressor(x_train, x_test, train_user): input : training data (date, total_user, total_event) in list of float where x = list of set (date and total event) output : list of total user prediction in float + >>> support_machine_regressor([5,7,8,9], [3,1,2,4], [2,1]) + [11.23, 12.23] """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) @@ -69,6 +75,8 @@ def interquartile_range_checker(train_user): input : list of total user in float output : low limit of input in float this method can be used to check whether some data is outlier or not + >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10]) + 2.20 """ train_user.sort() q1 = np.percentile(train_user, 25) From 13d23b66f50421e95288bfc3c7e7a16b7dc19bde Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 09:39:06 +0700 Subject: [PATCH 17/38] finding whitespace --- machine_learning/forecasting/run.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 60a330ce7bcf..97a198818857 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -37,14 +37,6 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): def sarimax_predictor(train_user, train_evnt, test_evnt): - """ - second method: sarimax - input : training data (total_user, - with exog data = total_event) in list of float - output : list of total user prediction in float - >>> sarimax_predictor([5,7,8,9], [3,1,2,4], [2,1]) - [10.67, 13.15] - """ order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order) From aa0daa511bf6a4b7ea5dd548d14222fe2a2df61a Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 09:59:29 +0700 Subject: [PATCH 18/38] fixing doctest --- machine_learning/forecasting/run.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 97a198818857..839ebae9446d 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,13 +19,6 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): - """ - First method: linear regression - input : training data (date, total_user, total_event) in list of float - output : list of total user prediction in float - >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4] [2,1], [2,2]) - [3.95, 4.25] - """ x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_evnt[i]]) @@ -37,6 +30,14 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): def sarimax_predictor(train_user, train_evnt, test_evnt): + """ + second method: sarimax + input : training data (total_user, + with exog data = total_event) in list of float + output : list of total user prediction in float + >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) + 6.6666671111109626 + """ order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order) @@ -51,13 +52,12 @@ def support_machine_regressor(x_train, x_test, train_user): input : training data (date, total_user, total_event) in list of float where x = list of set (date and total event) output : list of total user prediction in float - >>> support_machine_regressor([5,7,8,9], [3,1,2,4], [2,1]) - [11.23, 12.23] + >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) + 1.634932078116079 """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) - return y_pred[0] @@ -68,7 +68,7 @@ def interquartile_range_checker(train_user): output : low limit of input in float this method can be used to check whether some data is outlier or not >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10]) - 2.20 + 2.8 """ train_user.sort() q1 = np.percentile(train_user, 25) From 1e6f92362a3f3272011459f8ff920ad3a427a06c Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:05:39 +0700 Subject: [PATCH 19/38] delete --- machine_learning/forecasting/run.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 839ebae9446d..cb008162cba2 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -30,14 +30,6 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): def sarimax_predictor(train_user, train_evnt, test_evnt): - """ - second method: sarimax - input : training data (total_user, - with exog data = total_event) in list of float - output : list of total user prediction in float - >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) - 6.6666671111109626 - """ order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order) @@ -47,14 +39,6 @@ def sarimax_predictor(train_user, train_evnt, test_evnt): def support_machine_regressor(x_train, x_test, train_user): - """ - Third method: SVR - input : training data (date, total_user, total_event) in list of float - where x = list of set (date and total event) - output : list of total user prediction in float - >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) - 1.634932078116079 - """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) From ab98e753dce2fb68817ef507dc8023dc9d9bab44 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:10:38 +0700 Subject: [PATCH 20/38] revert back --- machine_learning/forecasting/run.py | 32 ++++++++++++----------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index cb008162cba2..0f3144eb166e 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -18,23 +18,23 @@ from statsmodels.tsa.statespace.sarimax import SARIMAX -def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt): +def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): x = [] for i in range(len(train_dt)): - x.append([1, train_dt[i], train_evnt[i]]) + x.append([1, train_dt[i], train_mtch[i]]) x = np.array(x) y = np.array(train_usr) beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) - prediction = abs(beta[0] + test_dt[0] * beta[1] + test_evnt[0] + beta[2]) + prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) return prediction -def sarimax_predictor(train_user, train_evnt, test_evnt): +def sarimax_predictor(train_user, train_match, test_match): order = (1, 2, 1) s_order = (1, 1, 0, 7) - model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order) + model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) model_fit = model.fit(disp=False, maxiter=600, method="nm") - result = model_fit.predict(1, len(test_evnt), exog=[test_evnt]) + result = model_fit.predict(1, len(test_match), exog=[test_match]) return result[0] @@ -42,18 +42,11 @@ def support_machine_regressor(x_train, x_test, train_user): regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) + return y_pred[0] def interquartile_range_checker(train_user): - """ - Optional method: interquatile range - input : list of total user in float - output : low limit of input in float - this method can be used to check whether some data is outlier or not - >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10]) - 2.8 - """ train_user.sort() q1 = np.percentile(train_user, 25) q3 = np.percentile(train_user, 75) @@ -83,6 +76,7 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) + """ data column = total user in a day, how much online event held in one day, what day is that(sunday-saturday) @@ -93,7 +87,7 @@ def data_safety_checker(list_vote, actual_result): # split data total_date = normalize_df[:, 2].tolist() total_user = normalize_df[:, 0].tolist() -total_event = normalize_df[:, 1].tolist() +total_match = normalize_df[:, 1].tolist() # for svr (input variable = total date and total match) x = normalize_df[:, [1, 2]].tolist() @@ -103,17 +97,17 @@ def data_safety_checker(list_vote, actual_result): # for linear reression & sarimax trn_date = total_date[: len(total_date) - 1] trn_user = total_user[: len(total_user) - 1] -trn_event = total_event[: len(total_event) - 1] +trn_match = total_match[: len(total_match) - 1] tst_date = total_date[len(total_date) - 1 :] tst_user = total_user[len(total_user) - 1 :] -tst_event = total_event[len(total_event) - 1 :] +tst_match = total_match[len(total_match) - 1 :] # voting system with forecasting res_vote = [] -res_vote.append(lin_reg_pred(trn_date, trn_user, trn_event, tst_date, tst_event)) -res_vote.append(sarimax_predictor(trn_user, trn_event, tst_event)) +res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match)) +res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match)) res_vote.append(support_machine_regressor(x_train, x_test, trn_user)) # check the safety of todays'data^^ From 38513880c9a383a4dc04aa8c09a4efcc1cf28781 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:13:54 +0700 Subject: [PATCH 21/38] revert back --- machine_learning/forecasting/run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 0f3144eb166e..b773ede5f91c 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,6 +19,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): + # linear regression, return float x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_mtch[i]]) @@ -30,6 +31,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): def sarimax_predictor(train_user, train_match, test_match): + # sarimax, return list of float order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) @@ -39,6 +41,7 @@ def sarimax_predictor(train_user, train_match, test_match): def support_machine_regressor(x_train, x_test, train_user): + # svr, return list of float regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) @@ -47,6 +50,8 @@ def support_machine_regressor(x_train, x_test, train_user): def interquartile_range_checker(train_user): + # optional + # return low limit and upper limit for outlier train_user.sort() q1 = np.percentile(train_user, 25) q3 = np.percentile(train_user, 75) From 6fb8a22d4e45608f6b0b61f4eb144190edf12875 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:21:58 +0700 Subject: [PATCH 22/38] revert back again --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b773ede5f91c..fc52d90f3cef 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,7 +19,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return float + # linear regression, return floatss x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_mtch[i]]) From 574d25d449970b094d4393bb5fe34c86f99715a7 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:27:02 +0700 Subject: [PATCH 23/38] revert back again --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index fc52d90f3cef..03a609f6b471 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,7 +19,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return floatss + # linear regression, return floats x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_mtch[i]]) From c151dd5b57c0f1cef7ec6a776b68b9729ecd9a3a Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 10:29:01 +0700 Subject: [PATCH 24/38] revert back again --- machine_learning/forecasting/run.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 03a609f6b471..22fe9a532fd7 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -45,7 +45,6 @@ def support_machine_regressor(x_train, x_test, train_user): regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) - return y_pred[0] @@ -57,7 +56,6 @@ def interquartile_range_checker(train_user): q3 = np.percentile(train_user, 75) iqr = q3 - q1 low_lim = q1 - (iqr * 0.1) - return low_lim From bf45d77ea56a36f1451e7676c2e59a23fce2f19d Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 16:09:40 +0700 Subject: [PATCH 25/38] try trimming whitespace --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 22fe9a532fd7..5367d2026517 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,7 +19,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return floats + # linear regression, return floatss x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_mtch[i]]) From 4cdec16f69ca03dea507631679e1afd1b00be751 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 16:24:20 +0700 Subject: [PATCH 26/38] try adding doctypeand etc --- machine_learning/forecasting/run.py | 36 +++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 5367d2026517..b9dc34bbcefe 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -19,7 +19,13 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): - # linear regression, return floatss + """ + First method: linear regression + input : training data (date, total_user, total_event) in list of float + output : list of total user prediction in float + >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) + 3.0000424034255513 + """ x = [] for i in range(len(train_dt)): x.append([1, train_dt[i], train_mtch[i]]) @@ -31,7 +37,14 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): def sarimax_predictor(train_user, train_match, test_match): - # sarimax, return list of float + """ + second method: sarimax + input : training data (total_user, + with exog data = total_event) in list of float + output : list of total user prediction in float + >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) + 6.6666671111109626 + """ order = (1, 2, 1) s_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) @@ -41,7 +54,14 @@ def sarimax_predictor(train_user, train_match, test_match): def support_machine_regressor(x_train, x_test, train_user): - # svr, return list of float + """ + Third method: SVR + input : training data (date, total_user, total_event) in list of float + where x = list of set (date and total event) + output : list of total user prediction in float + >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) + 1.634932078116079 + """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) regressor.fit(x_train, train_user) y_pred = regressor.predict(x_test) @@ -49,8 +69,14 @@ def support_machine_regressor(x_train, x_test, train_user): def interquartile_range_checker(train_user): - # optional - # return low limit and upper limit for outlier + """ + Optional method: interquatile range + input : list of total user in float + output : low limit of input in float + this method can be used to check whether some data is outlier or not + >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10]) + 2.8 + """ train_user.sort() q1 = np.percentile(train_user, 25) q3 = np.percentile(train_user, 75) From 43c9d4ccf791508c37b2a7b814f01576903e583b Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 16:58:54 +0700 Subject: [PATCH 27/38] fixing reviews --- machine_learning/forecasting/run.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b9dc34bbcefe..d95858f051fe 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -23,12 +23,11 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): First method: linear regression input : training data (date, total_user, total_event) in list of float output : list of total user prediction in float - >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) + >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) 3.0000424034255513 """ x = [] - for i in range(len(train_dt)): - x.append([1, train_dt[i], train_mtch[i]]) + x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)] x = np.array(x) y = np.array(train_usr) beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) @@ -39,15 +38,18 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): def sarimax_predictor(train_user, train_match, test_match): """ second method: sarimax - input : training data (total_user, - with exog data = total_event) in list of float + sarimax is a statistic method which using previous input + and learn its pattern to predict future data + input : training data (total_user, with exog data = total_event) in list of float output : list of total user prediction in float >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) 6.6666671111109626 """ order = (1, 2, 1) - s_order = (1, 1, 0, 7) - model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order) + seasonal_order = (1, 1, 0, 7) + model = SARIMAX( + train_user, exog=train_match, order=order, seasonal_order=seasonal_order + ) model_fit = model.fit(disp=False, maxiter=600, method="nm") result = model_fit.predict(1, len(test_match), exog=[test_match]) return result[0] @@ -55,7 +57,11 @@ def sarimax_predictor(train_user, train_match, test_match): def support_machine_regressor(x_train, x_test, train_user): """ - Third method: SVR + Third method: Support vector regressor + svr is quite the same with svm(support vector machine) + it uses the same principles as the SVM for classification, + with only a few minor differences and the only different is that + it suits better for regression purpose input : training data (date, total_user, total_event) in list of float where x = list of set (date and total event) output : list of total user prediction in float From 14ccdc88ac0c1e98c87aa37b69f5605c310656c9 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 17:05:42 +0700 Subject: [PATCH 28/38] deleting all the space --- machine_learning/forecasting/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index d95858f051fe..e81b7fa4804a 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -43,7 +43,7 @@ def sarimax_predictor(train_user, train_match, test_match): input : training data (total_user, with exog data = total_event) in list of float output : list of total user prediction in float >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) - 6.6666671111109626 + 3.0000424034255513 """ order = (1, 2, 1) seasonal_order = (1, 1, 0, 7) @@ -63,7 +63,7 @@ def support_machine_regressor(x_train, x_test, train_user): with only a few minor differences and the only different is that it suits better for regression purpose input : training data (date, total_user, total_event) in list of float - where x = list of set (date and total event) + where x = list of set (date and total event) output : list of total user prediction in float >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) 1.634932078116079 From 8194902ad19fb49b9c91160faeba05b16ecf34af Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 17:11:25 +0700 Subject: [PATCH 29/38] fixing the build --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index e81b7fa4804a..2f7e25167bcc 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -24,7 +24,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): input : training data (date, total_user, total_event) in list of float output : list of total user prediction in float >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) - 3.0000424034255513 + 5.000000000000003 """ x = [] x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)] From eab1d3b1b74d87f9e1af1df9b7e484ce1b4c2601 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 17:15:33 +0700 Subject: [PATCH 30/38] delete x --- machine_learning/forecasting/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 2f7e25167bcc..f5d33d438478 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -26,7 +26,6 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) 5.000000000000003 """ - x = [] x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)] x = np.array(x) y = np.array(train_usr) From d34853f089f031762f1c3e86238dce0b12c344fa Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 19:34:19 +0700 Subject: [PATCH 31/38] add description for safety checker --- machine_learning/forecasting/run.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index f5d33d438478..d6c2cc564557 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -42,7 +42,7 @@ def sarimax_predictor(train_user, train_match, test_match): input : training data (total_user, with exog data = total_event) in list of float output : list of total user prediction in float >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) - 3.0000424034255513 + 6.6666671111109626 """ order = (1, 2, 1) seasonal_order = (1, 1, 0, 7) @@ -91,6 +91,14 @@ def interquartile_range_checker(train_user): def data_safety_checker(list_vote, actual_result): + """ + Used to review all the votes (list result prediction) + and compare it to the actual result. + input : list of predictions + output : print whether it's safe or not + >>> data_safety_checker([2,3,4],5) + today's data is safe + """ safe = 0 not_safe = 0 for i in list_vote: @@ -108,8 +116,8 @@ def data_safety_checker(list_vote, actual_result): # data_input_df = pd.read_csv("ex_data.csv", header=None) -list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] -data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"]) +data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] +data_input_df = pd.DataFrame(data_input, columns=["total_user", "total_even", "days"]) """ data column = total user in a day, how much online event held in one day, From 81effc54ff18aab234c166dd60e394ae8ecf45f9 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 19:42:28 +0700 Subject: [PATCH 32/38] deleting subscription integer --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index d6c2cc564557..95d28aadfd22 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -105,7 +105,7 @@ def data_safety_checker(list_vote, actual_result): if i > actual_result: safe = not_safe + 1 else: - if abs(abs(i) - abs(actual_result[0])) <= 0.1: + if abs(abs(i) - abs(actual_result)) <= 0.1: safe = safe + 1 else: not_safe = not_safe + 1 From 6c8f1af0d6f30e7816281ec7325f7ff8d43f80ce Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Tue, 13 Oct 2020 19:50:00 +0700 Subject: [PATCH 33/38] fix docthint --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 95d28aadfd22..b48998118e39 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -97,7 +97,7 @@ def data_safety_checker(list_vote, actual_result): input : list of predictions output : print whether it's safe or not >>> data_safety_checker([2,3,4],5) - today's data is safe + today's data is not safe """ safe = 0 not_safe = 0 From 070cba4e2b6192f04215a0711d5754608f34c92b Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Sat, 24 Oct 2020 16:45:34 +0700 Subject: [PATCH 34/38] make def to use function parameters and return values --- machine_learning/forecasting/run.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index b48998118e39..7e3f7269e755 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -18,7 +18,9 @@ from statsmodels.tsa.statespace.sarimax import SARIMAX -def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): +def lin_reg_pred( + train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list +) -> float: """ First method: linear regression input : training data (date, total_user, total_event) in list of float @@ -34,7 +36,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch): return prediction -def sarimax_predictor(train_user, train_match, test_match): +def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float: """ second method: sarimax sarimax is a statistic method which using previous input @@ -54,7 +56,7 @@ def sarimax_predictor(train_user, train_match, test_match): return result[0] -def support_machine_regressor(x_train, x_test, train_user): +def support_machine_regressor(x_train: list, x_test: list, train_user: list) -> float: """ Third method: Support vector regressor svr is quite the same with svm(support vector machine) @@ -73,7 +75,7 @@ def support_machine_regressor(x_train, x_test, train_user): return y_pred[0] -def interquartile_range_checker(train_user): +def interquartile_range_checker(train_user: list) -> float: """ Optional method: interquatile range input : list of total user in float From 28ac6494ca670ae1a378a4fd2e87890ab1346563 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Sat, 24 Oct 2020 20:06:05 +0700 Subject: [PATCH 35/38] make def to use function parameters and return values --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 7e3f7269e755..bf3fe3f2dfec 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -38,7 +38,7 @@ def lin_reg_pred( def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float: """ - second method: sarimax + second method: Sarimax sarimax is a statistic method which using previous input and learn its pattern to predict future data input : training data (total_user, with exog data = total_event) in list of float From da69f838c03e3893b01a5861a51662d062508bd8 Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Sat, 24 Oct 2020 20:42:51 +0700 Subject: [PATCH 36/38] type hints on data safety checker --- machine_learning/forecasting/run.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index bf3fe3f2dfec..346d94fa0b45 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -18,22 +18,21 @@ from statsmodels.tsa.statespace.sarimax import SARIMAX -def lin_reg_pred( +def linear_regression_prediction( train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list ) -> float: """ First method: linear regression input : training data (date, total_user, total_event) in list of float output : list of total user prediction in float - >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) + >>> linear_regression_prediction([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) 5.000000000000003 """ x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)] x = np.array(x) y = np.array(train_usr) beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) - prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) - return prediction + return abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float: @@ -56,7 +55,7 @@ def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> return result[0] -def support_machine_regressor(x_train: list, x_test: list, train_user: list) -> float: +def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> float: """ Third method: Support vector regressor svr is quite the same with svm(support vector machine) @@ -66,7 +65,7 @@ def support_machine_regressor(x_train: list, x_test: list, train_user: list) -> input : training data (date, total_user, total_event) in list of float where x = list of set (date and total event) output : list of total user prediction in float - >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) + >>> support_vector_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) 1.634932078116079 """ regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) @@ -99,7 +98,7 @@ def data_safety_checker(list_vote, actual_result): input : list of predictions output : print whether it's safe or not >>> data_safety_checker([2,3,4],5) - today's data is not safe + today's data = not safe """ safe = 0 not_safe = 0 @@ -111,10 +110,7 @@ def data_safety_checker(list_vote, actual_result): safe = safe + 1 else: not_safe = not_safe + 1 - if safe > not_safe: - print("today's data is safe") - else: - print("today's data is not safe") + print("today's data =", "not" if safe <= not_safe else "", "safe") # data_input_df = pd.read_csv("ex_data.csv", header=None) @@ -150,9 +146,11 @@ def data_safety_checker(list_vote, actual_result): # voting system with forecasting res_vote = [] -res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match)) +res_vote.append( + linear_regression_prediction(trn_date, trn_user, trn_match, tst_date, tst_match) +) res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match)) -res_vote.append(support_machine_regressor(x_train, x_test, trn_user)) +res_vote.append(support_vector_regressor(x_train, x_test, trn_user)) # check the safety of todays'data^^ data_safety_checker(res_vote, tst_user) From 3b18a483fb1de578f38df701e15f750676af8f9b Mon Sep 17 00:00:00 2001 From: FVFYK3GEHV22 Date: Sat, 24 Oct 2020 20:45:38 +0700 Subject: [PATCH 37/38] optimize code --- machine_learning/forecasting/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 346d94fa0b45..c33b1a926a08 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -91,13 +91,13 @@ def interquartile_range_checker(train_user: list) -> float: return low_lim -def data_safety_checker(list_vote, actual_result): +def data_safety_checker(list_vote: list, actual_result: float): """ Used to review all the votes (list result prediction) and compare it to the actual result. input : list of predictions output : print whether it's safe or not - >>> data_safety_checker([2,3,4],5) + >>> data_safety_checker([2,3,4],5.0) today's data = not safe """ safe = 0 From 85723b60754653a91e84e0dcd227ef0d4578f7ad Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 24 Oct 2020 16:02:12 +0200 Subject: [PATCH 38/38] Update run.py --- machine_learning/forecasting/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index c33b1a926a08..467371e8d2ff 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -91,14 +91,14 @@ def interquartile_range_checker(train_user: list) -> float: return low_lim -def data_safety_checker(list_vote: list, actual_result: float): +def data_safety_checker(list_vote: list, actual_result: float) -> None: """ Used to review all the votes (list result prediction) and compare it to the actual result. input : list of predictions output : print whether it's safe or not >>> data_safety_checker([2,3,4],5.0) - today's data = not safe + Today's data is not safe. """ safe = 0 not_safe = 0 @@ -110,7 +110,7 @@ def data_safety_checker(list_vote: list, actual_result: float): safe = safe + 1 else: not_safe = not_safe + 1 - print("today's data =", "not" if safe <= not_safe else "", "safe") + print(f"Today's data is {'not ' if safe <= not_safe else ''}safe.") # data_input_df = pd.read_csv("ex_data.csv", header=None)