From 722584e4fefb8b3207a7f9d580cb225c4d554171 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 11:31:17 +0700
Subject: [PATCH 01/38] add forecasting code

---
 machine_learning/forecasting/__init__.py |   0
 machine_learning/forecasting/ex_data.csv | 114 +++++++++++++++++++++++
 machine_learning/forecasting/methods.py  |  46 +++++++++
 machine_learning/forecasting/run.py      |  71 ++++++++++++++
 4 files changed, 231 insertions(+)
 create mode 100644 machine_learning/forecasting/__init__.py
 create mode 100644 machine_learning/forecasting/ex_data.csv
 create mode 100644 machine_learning/forecasting/methods.py
 create mode 100644 machine_learning/forecasting/run.py

diff --git a/machine_learning/forecasting/__init__.py b/machine_learning/forecasting/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/machine_learning/forecasting/ex_data.csv b/machine_learning/forecasting/ex_data.csv
new file mode 100644
index 000000000000..1c429e649755
--- /dev/null
+++ b/machine_learning/forecasting/ex_data.csv
@@ -0,0 +1,114 @@
+total_user,total_events,days
+18231,0.0,1
+22621,1.0,2
+15675,0.0,3
+23583,1.0,4
+68351,5.0,5
+34338,3.0,6
+19238,0.0,0
+24192,0.0,1
+70349,0.0,2
+103510,0.0,3
+128355,1.0,4
+148484,6.0,5
+153489,3.0,6
+162667,1.0,0
+311430,3.0,1
+435663,7.0,2
+273526,0.0,3
+628588,2.0,4
+454989,13.0,5
+539040,3.0,6
+52974,1.0,0
+103451,2.0,1
+810020,5.0,2
+580982,3.0,3
+216515,0.0,4
+134694,10.0,5
+93563,1.0,6
+55432,1.0,0
+169634,1.0,1
+254908,4.0,2
+315285,3.0,3
+191764,0.0,4
+514284,7.0,5
+181214,4.0,6
+78459,2.0,0
+161620,3.0,1
+245610,4.0,2
+326722,5.0,3
+214578,0.0,4
+312365,5.0,5
+232454,4.0,6
+178368,1.0,0
+97152,1.0,1
+222813,4.0,2
+285852,4.0,3
+192149,1.0,4
+142241,1.0,5
+173011,2.0,6
+56488,3.0,0
+89572,2.0,1
+356082,2.0,2
+172799,0.0,3
+142300,1.0,4
+78432,2.0,5
+539023,9.0,6
+62389,1.0,0
+70247,1.0,1
+89229,0.0,2
+94583,1.0,3
+102455,0.0,4
+129270,0.0,5
+311409,1.0,6
+1837026,0.0,0
+361824,0.0,1
+111379,2.0,2
+76337,2.0,3
+96747,0.0,4
+92058,0.0,5
+81929,2.0,6
+143423,0.0,0
+82939,0.0,1
+74403,1.0,2
+68234,0.0,3
+94556,1.0,4
+80311,0.0,5
+75283,3.0,6
+77724,0.0,0
+49229,2.0,1
+65708,2.0,2
+273864,1.0,3
+1711281,0.0,4
+1900253,5.0,5
+343071,1.0,6
+1551326,0.0,0
+56636,1.0,1
+272782,2.0,2
+1785678,0.0,3
+241866,0.0,4
+461904,0.0,5
+2191901,2.0,6
+102925,0.0,0
+242778,1.0,1
+298608,0.0,2
+322458,10.0,3
+216027,9.0,4
+916052,12.0,5
+193278,12.0,6
+263207,8.0,0
+672948,10.0,1
+281909,1.0,2
+384562,1.0,3
+1027375,2.0,4
+828905,9.0,5
+624188,22.0,6
+392218,8.0,0
+292581,10.0,1
+299869,12.0,2
+769455,20.0,3
+316443,8.0,4
+1212864,24.0,5
+1397338,28.0,6
+223249,8.0,0
+191264,14.0,1
diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py
new file mode 100644
index 000000000000..c5aaafa0a39c
--- /dev/null
+++ b/machine_learning/forecasting/methods.py
@@ -0,0 +1,46 @@
+import numpy as np
+from statsmodels.tsa.statespace.sarimax import SARIMAX
+from sklearn.svm import SVR
+
+
+def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
+    # linear regression, return float
+    x = []
+    for i in range(len(train_dt)):
+        x.append([1, train_dt[i], train_mtch[i]])
+    x = np.array(x)
+    y = np.array(train_usr)
+    beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
+    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
+    return prediction
+
+
+def sarimax_predictor(train_user, train_match, test_match):
+    # sarimax, return list of float
+    order = (1, 2, 1)
+    s_order = (1, 1, 0, 7)
+    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
+    model_fit = model.fit(disp=False, maxiter=600, method='nm')
+    result = model_fit.predict(1, len(test_match), exog=[test_match])
+    return result[0]
+
+
+def support_machine_regressor(x_train, x_test, train_user):
+    # svr, return list of float
+    regressor = SVR(kernel='rbf', C=1, gamma=0.1, epsilon=.1)
+    regressor.fit(x_train, train_user)
+    y_pred = regressor.predict(x_test)
+
+    return y_pred[0]
+
+
+def interquartile_range_checker(train_user):
+    # optional
+    # return low limit and upper limit for outlier
+    train_user.sort()
+    q1 = np.percentile(train_user, 25)
+    q3 = np.percentile(train_user, 75)
+    iqr = q3 - q1
+    low_lim = q1 - (iqr * 0.1)
+
+    return low_lim
diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
new file mode 100644
index 000000000000..4ac39c25c5dc
--- /dev/null
+++ b/machine_learning/forecasting/run.py
@@ -0,0 +1,71 @@
+'''
+this is code for forecasting
+but i modified it and used it for safety checker of data
+for ex: you have a online shop and for some reason some data are
+missing (the amount of data that u expected are not supposed to be)
+        then we can use it
+*ps : 1. ofc we can use normal statistic method but in this case
+         the data is quite absurd and only a little^^
+      2. ofc u can use this and modified it for forecasting purpose
+         for the next 3 months sales or something,
+         u can just adjust it for ur own purpose
+'''
+
+from sklearn.preprocessing import Normalizer
+import pandas as pd
+from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor
+
+
+def data_safety_checker(list_vote, actual_result):
+    safe = 0
+    not_safe = 0
+    for i in list_vote:
+        if i > actual_result:
+            safe = not_safe + 1
+        else :
+            if (abs(abs(i) - abs(actual_result[0])) <= 0.1):
+                safe = safe + 1
+            else:
+                not_safe = not_safe + 1
+    if safe > not_safe :
+        print("today's data is safe")
+    else :
+        print("today's data is not safe")
+
+
+data_input_df = pd.read_csv("ex_data.csv")
+'''
+data column = total user in a day, how much online event held in one day,
+what day is that(sunday-saturday)
+'''
+
+# start normalization
+normalize_df = Normalizer().fit_transform(data_input_df.values)
+# split data
+total_date = normalize_df[: , 2].tolist()
+total_user = normalize_df[: , 0].tolist()
+total_match = normalize_df[: , 1].tolist()
+
+# for svr (input variable = total date and total match)
+x = normalize_df[:, [1 , 2]].tolist()
+x_train = x[:len(x) - 1]
+x_test = x[len(x) - 1:]
+
+# for linear reression & sarimax
+trn_date = total_date[:len(total_date) - 1]
+trn_user = total_user[:len(total_user) - 1]
+trn_match = total_match[:len(total_match) - 1]
+
+tst_date = total_date[len(total_date) - 1:]
+tst_user = total_user[len(total_user) - 1:]
+tst_match = total_match[len(total_match) - 1:]
+
+
+# voting system with forecasting
+res_vote = []
+res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match))
+res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match))
+res_vote.append(support_machine_regressor(x_train, x_test, trn_user))
+
+# check the safety of todays'data^^
+data_safety_checker(res_vote, tst_user)

From 0b823da4a9f6ec973aa7e8e8bc86d89b68a9890d Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 11:50:34 +0700
Subject: [PATCH 02/38] add statsmodel

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 31dc586c29db..4b452cbdede5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ sklearn
 sympy
 tensorflow
 xgboost
+statsmodels

From aab3a718936c1da93ba65dd0882139cf47afceec Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 11:55:36 +0700
Subject: [PATCH 03/38] sort import

---
 machine_learning/forecasting/methods.py | 2 +-
 machine_learning/forecasting/run.py     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py
index c5aaafa0a39c..cef244e1b4ff 100644
--- a/machine_learning/forecasting/methods.py
+++ b/machine_learning/forecasting/methods.py
@@ -1,6 +1,6 @@
 import numpy as np
-from statsmodels.tsa.statespace.sarimax import SARIMAX
 from sklearn.svm import SVR
+from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 4ac39c25c5dc..81874e5fe003 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,9 +11,9 @@
          u can just adjust it for ur own purpose
 '''
 
-from sklearn.preprocessing import Normalizer
-import pandas as pd
 from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor
+import pandas as pd
+from sklearn.preprocessing import Normalizer
 
 
 def data_safety_checker(list_vote, actual_result):

From 8f6941847cc788e834b33e9b5b0dae1d4557fe97 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 11:59:41 +0700
Subject: [PATCH 04/38] sort import fix

---
 machine_learning/forecasting/methods.py | 1 +
 machine_learning/forecasting/run.py     | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py
index cef244e1b4ff..c62fc06146b6 100644
--- a/machine_learning/forecasting/methods.py
+++ b/machine_learning/forecasting/methods.py
@@ -1,5 +1,6 @@
 import numpy as np
 from sklearn.svm import SVR
+
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 81874e5fe003..9e15b29462ea 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,10 +11,11 @@
          u can just adjust it for ur own purpose
 '''
 
-from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 
+from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor
+
 
 def data_safety_checker(list_vote, actual_result):
     safe = 0

From 31b89268bdec2af0e7e6575cb3acf50b336b3a61 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:05:30 +0700
Subject: [PATCH 05/38] fixing black

---
 machine_learning/forecasting/methods.py |  4 +--
 machine_learning/forecasting/run.py     | 40 ++++++++++++-------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py
index c62fc06146b6..28a74bfcc98b 100644
--- a/machine_learning/forecasting/methods.py
+++ b/machine_learning/forecasting/methods.py
@@ -21,14 +21,14 @@ def sarimax_predictor(train_user, train_match, test_match):
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
-    model_fit = model.fit(disp=False, maxiter=600, method='nm')
+    model_fit = model.fit(disp=False, maxiter=600, method="nm")
     result = model_fit.predict(1, len(test_match), exog=[test_match])
     return result[0]
 
 
 def support_machine_regressor(x_train, x_test, train_user):
     # svr, return list of float
-    regressor = SVR(kernel='rbf', C=1, gamma=0.1, epsilon=.1)
+    regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
 
diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 9e15b29462ea..5da2b7e68fc2 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -1,4 +1,4 @@
-'''
+"""
 this is code for forecasting
 but i modified it and used it for safety checker of data
 for ex: you have a online shop and for some reason some data are
@@ -9,7 +9,7 @@
       2. ofc u can use this and modified it for forecasting purpose
          for the next 3 months sales or something,
          u can just adjust it for ur own purpose
-'''
+"""
 
 import pandas as pd
 from sklearn.preprocessing import Normalizer
@@ -23,43 +23,43 @@ def data_safety_checker(list_vote, actual_result):
     for i in list_vote:
         if i > actual_result:
             safe = not_safe + 1
-        else :
-            if (abs(abs(i) - abs(actual_result[0])) <= 0.1):
+        else:
+            if abs(abs(i) - abs(actual_result[0])) <= 0.1:
                 safe = safe + 1
             else:
                 not_safe = not_safe + 1
-    if safe > not_safe :
+    if safe > not_safe:
         print("today's data is safe")
-    else :
+    else:
         print("today's data is not safe")
 
 
 data_input_df = pd.read_csv("ex_data.csv")
-'''
+"""
 data column = total user in a day, how much online event held in one day,
 what day is that(sunday-saturday)
-'''
+"""
 
 # start normalization
 normalize_df = Normalizer().fit_transform(data_input_df.values)
 # split data
-total_date = normalize_df[: , 2].tolist()
-total_user = normalize_df[: , 0].tolist()
-total_match = normalize_df[: , 1].tolist()
+total_date = normalize_df[:, 2].tolist()
+total_user = normalize_df[:, 0].tolist()
+total_match = normalize_df[:, 1].tolist()
 
 # for svr (input variable = total date and total match)
-x = normalize_df[:, [1 , 2]].tolist()
-x_train = x[:len(x) - 1]
-x_test = x[len(x) - 1:]
+x = normalize_df[:, [1, 2]].tolist()
+x_train = x[: len(x) - 1]
+x_test = x[len(x) - 1 :]
 
 # for linear reression & sarimax
-trn_date = total_date[:len(total_date) - 1]
-trn_user = total_user[:len(total_user) - 1]
-trn_match = total_match[:len(total_match) - 1]
+trn_date = total_date[: len(total_date) - 1]
+trn_user = total_user[: len(total_user) - 1]
+trn_match = total_match[: len(total_match) - 1]
 
-tst_date = total_date[len(total_date) - 1:]
-tst_user = total_user[len(total_user) - 1:]
-tst_match = total_match[len(total_match) - 1:]
+tst_date = total_date[len(total_date) - 1 :]
+tst_user = total_user[len(total_user) - 1 :]
+tst_match = total_match[len(total_match) - 1 :]
 
 
 # voting system with forecasting

From e3ba4fa72003ca6fe199195b403d0f8cd5aa45f1 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:10:49 +0700
Subject: [PATCH 06/38] sort requirement

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4b452cbdede5..2ee0e13282d9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ pillow
 requests
 scikit-fuzzy
 sklearn
+statsmodels
 sympy
 tensorflow
 xgboost
-statsmodels

From 058889bc359ee6a7a4615c0be06e47282076bfb5 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:12:53 +0700
Subject: [PATCH 07/38] optimize code

---
 machine_learning/forecasting/methods.py | 47 -------------------------
 machine_learning/forecasting/run.py     | 46 +++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 48 deletions(-)
 delete mode 100644 machine_learning/forecasting/methods.py

diff --git a/machine_learning/forecasting/methods.py b/machine_learning/forecasting/methods.py
deleted file mode 100644
index 28a74bfcc98b..000000000000
--- a/machine_learning/forecasting/methods.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-from sklearn.svm import SVR
-
-from statsmodels.tsa.statespace.sarimax import SARIMAX
-
-
-def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return float
-    x = []
-    for i in range(len(train_dt)):
-        x.append([1, train_dt[i], train_mtch[i]])
-    x = np.array(x)
-    y = np.array(train_usr)
-    beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
-    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
-    return prediction
-
-
-def sarimax_predictor(train_user, train_match, test_match):
-    # sarimax, return list of float
-    order = (1, 2, 1)
-    s_order = (1, 1, 0, 7)
-    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
-    model_fit = model.fit(disp=False, maxiter=600, method="nm")
-    result = model_fit.predict(1, len(test_match), exog=[test_match])
-    return result[0]
-
-
-def support_machine_regressor(x_train, x_test, train_user):
-    # svr, return list of float
-    regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
-    regressor.fit(x_train, train_user)
-    y_pred = regressor.predict(x_test)
-
-    return y_pred[0]
-
-
-def interquartile_range_checker(train_user):
-    # optional
-    # return low limit and upper limit for outlier
-    train_user.sort()
-    q1 = np.percentile(train_user, 25)
-    q3 = np.percentile(train_user, 75)
-    iqr = q3 - q1
-    low_lim = q1 - (iqr * 0.1)
-
-    return low_lim
diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 5da2b7e68fc2..6d6877b0d58c 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,10 +11,54 @@
          u can just adjust it for ur own purpose
 """
 
+import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
+from sklearn.svm import SVR
+from statsmodels.tsa.statespace.sarimax import SARIMAX
 
-from methods import lin_reg_pred, sarimax_predictor, support_machine_regressor
+
+def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
+    # linear regression, return float
+    x = []
+    for i in range(len(train_dt)):
+        x.append([1, train_dt[i], train_mtch[i]])
+    x = np.array(x)
+    y = np.array(train_usr)
+    beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
+    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
+    return prediction
+
+
+def sarimax_predictor(train_user, train_match, test_match):
+    # sarimax, return list of float
+    order = (1, 2, 1)
+    s_order = (1, 1, 0, 7)
+    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
+    model_fit = model.fit(disp=False, maxiter=600, method="nm")
+    result = model_fit.predict(1, len(test_match), exog=[test_match])
+    return result[0]
+
+
+def support_machine_regressor(x_train, x_test, train_user):
+    # svr, return list of float
+    regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
+    regressor.fit(x_train, train_user)
+    y_pred = regressor.predict(x_test)
+
+    return y_pred[0]
+
+
+def interquartile_range_checker(train_user):
+    # optional
+    # return low limit and upper limit for outlier
+    train_user.sort()
+    q1 = np.percentile(train_user, 25)
+    q3 = np.percentile(train_user, 75)
+    iqr = q3 - q1
+    low_lim = q1 - (iqr * 0.1)
+
+    return low_lim
 
 
 def data_safety_checker(list_vote, actual_result):

From 7a58104d4e62f124b52d7a22093f32f76e256f80 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:54:14 +0700
Subject: [PATCH 08/38] try with limited data

---
 machine_learning/forecasting/run.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 6d6877b0d58c..1e8dcf97f146 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,12 +11,19 @@
          u can just adjust it for ur own purpose
 """
 
+import warnings
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
+from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
+warnings.simplefilter("ignore", ConvergenceWarning)
+warnings.simplefilter("ignore", UserWarning)
+warnings.simplefilter("ignore", RuntimeWarning)
+np.seterr(divide="ignore", invalid="ignore")
+
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     # linear regression, return float
@@ -78,7 +85,10 @@ def data_safety_checker(list_vote, actual_result):
         print("today's data is not safe")
 
 
-data_input_df = pd.read_csv("ex_data.csv")
+# data_input_df = pd.read_csv("ex_data.csv", header=None)
+list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
+data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
+
 """
 data column = total user in a day, how much online event held in one day,
 what day is that(sunday-saturday)

From 6f0b775084f35cb9435a9da8195c651459e5e22b Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:56:33 +0700
Subject: [PATCH 09/38] sort again

---
 machine_learning/forecasting/run.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 1e8dcf97f146..b70b85e81553 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -12,10 +12,12 @@
 """
 
 import warnings
+
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
+
 from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 

From 5fd4b0536e4968da4e09ab8a5ef2027fbc16daaa Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:58:56 +0700
Subject: [PATCH 10/38] sort fix

---
 machine_learning/forecasting/run.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b70b85e81553..0f5ccb7413d0 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,20 +11,18 @@
          u can just adjust it for ur own purpose
 """
 
-import warnings
-
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
-
 from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
+import warnings
 
-warnings.simplefilter("ignore", ConvergenceWarning)
+warnings.simplefilter('ignore', ConvergenceWarning)
 warnings.simplefilter("ignore", UserWarning)
 warnings.simplefilter("ignore", RuntimeWarning)
-np.seterr(divide="ignore", invalid="ignore")
+np.seterr(divide='ignore', invalid='ignore')
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
@@ -89,7 +87,7 @@ def data_safety_checker(list_vote, actual_result):
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
+data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days'])
 
 """
 data column = total user in a day, how much online event held in one day,

From 4864b411209782650fe08f6a652ef68decc1bc88 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 12:58:56 +0700
Subject: [PATCH 11/38] sort fix

---
 machine_learning/forecasting/run.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b70b85e81553..33416606bdca 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,21 +11,13 @@
          u can just adjust it for ur own purpose
 """
 
-import warnings
-
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
-
 from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
-warnings.simplefilter("ignore", ConvergenceWarning)
-warnings.simplefilter("ignore", UserWarning)
-warnings.simplefilter("ignore", RuntimeWarning)
-np.seterr(divide="ignore", invalid="ignore")
-
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     # linear regression, return float
@@ -89,7 +81,7 @@ def data_safety_checker(list_vote, actual_result):
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
+data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days'])
 
 """
 data column = total user in a day, how much online event held in one day,

From 122fdc68692a020ed263f167e5eb81c4e737f84e Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 13:03:20 +0700
Subject: [PATCH 12/38] delete warning and black

---
 machine_learning/forecasting/run.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 33416606bdca..b773ede5f91c 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -15,7 +15,6 @@
 import pandas as pd
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
-from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
@@ -81,7 +80,7 @@ def data_safety_checker(list_vote, actual_result):
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days'])
+data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
 
 """
 data column = total user in a day, how much online event held in one day,

From b3df9254205d1c4bd9314ad284896b90e47e738c Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 13:08:32 +0700
Subject: [PATCH 13/38] add code for forecasting

---
 machine_learning/forecasting/run.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 16bd28a0529f..c1ec2da34a28 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -16,7 +16,6 @@
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
 from statsmodels.tsa.statespace.sarimax import SARIMAX
-import warnings
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):

From cfa6a2f2d03a136eeec396f2536aae75c6648cca Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Mon, 12 Oct 2020 13:09:09 +0700
Subject: [PATCH 14/38] use black

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index c1ec2da34a28..b773ede5f91c 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -80,7 +80,7 @@ def data_safety_checker(list_vote, actual_result):
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(list_data, columns=['total_user', 'total_even', 'days'])
+data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
 
 """
 data column = total user in a day, how much online event held in one day,

From cefc5f4505a4b2211f2c54e87076c3b65286280f Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 09:22:17 +0700
Subject: [PATCH 15/38] add more hints to describe

---
 machine_learning/forecasting/run.py | 51 +++++++++++++++++++----------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b773ede5f91c..d7cb01569ae1 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -18,30 +18,44 @@
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
-def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return float
+def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
+    """
+    First method: linear regression
+    input : training data (date, total_user, total_event) in list of float
+    output : list of total user prediction in float
+    """
     x = []
     for i in range(len(train_dt)):
-        x.append([1, train_dt[i], train_mtch[i]])
+        x.append([1, train_dt[i], train_evnt[i]])
     x = np.array(x)
     y = np.array(train_usr)
     beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
-    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
+    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_evnt[0] + beta[2])
     return prediction
 
 
-def sarimax_predictor(train_user, train_match, test_match):
-    # sarimax, return list of float
+def sarimax_predictor(train_user, train_evnt, test_evnt):
+    """
+    second method: sarimax
+    input : training data (total_user,
+            with exog data = total_event) in list of float
+    output : list of total user prediction in float
+    """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
-    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
+    model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order)
     model_fit = model.fit(disp=False, maxiter=600, method="nm")
-    result = model_fit.predict(1, len(test_match), exog=[test_match])
+    result = model_fit.predict(1, len(test_evnt), exog=[test_evnt])
     return result[0]
 
 
 def support_machine_regressor(x_train, x_test, train_user):
-    # svr, return list of float
+    """
+    Third method: SVR
+    input : training data (date, total_user, total_event) in list of float
+            where x = list of set (date and total event)
+    output : list of total user prediction in float
+    """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
@@ -50,8 +64,12 @@ def support_machine_regressor(x_train, x_test, train_user):
 
 
 def interquartile_range_checker(train_user):
-    # optional
-    # return low limit and upper limit for outlier
+    """
+    Optional method: interquatile range
+    input : list of total user in float
+    output : low limit of input in float
+    this method can be used to check whether some data is outlier or not
+    """
     train_user.sort()
     q1 = np.percentile(train_user, 25)
     q3 = np.percentile(train_user, 75)
@@ -81,7 +99,6 @@ def data_safety_checker(list_vote, actual_result):
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
 data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
-
 """
 data column = total user in a day, how much online event held in one day,
 what day is that(sunday-saturday)
@@ -92,7 +109,7 @@ def data_safety_checker(list_vote, actual_result):
 # split data
 total_date = normalize_df[:, 2].tolist()
 total_user = normalize_df[:, 0].tolist()
-total_match = normalize_df[:, 1].tolist()
+total_event = normalize_df[:, 1].tolist()
 
 # for svr (input variable = total date and total match)
 x = normalize_df[:, [1, 2]].tolist()
@@ -102,17 +119,17 @@ def data_safety_checker(list_vote, actual_result):
 # for linear reression & sarimax
 trn_date = total_date[: len(total_date) - 1]
 trn_user = total_user[: len(total_user) - 1]
-trn_match = total_match[: len(total_match) - 1]
+trn_event = total_event[: len(total_event) - 1]
 
 tst_date = total_date[len(total_date) - 1 :]
 tst_user = total_user[len(total_user) - 1 :]
-tst_match = total_match[len(total_match) - 1 :]
+tst_event = total_event[len(total_event) - 1 :]
 
 
 # voting system with forecasting
 res_vote = []
-res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match))
-res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match))
+res_vote.append(lin_reg_pred(trn_date, trn_user, trn_event, tst_date, tst_event))
+res_vote.append(sarimax_predictor(trn_user, trn_event, tst_event))
 res_vote.append(support_machine_regressor(x_train, x_test, trn_user))
 
 # check the safety of todays'data^^

From 85b37cc4de22e1b5ca2951552085a96a434bdb04 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 09:36:05 +0700
Subject: [PATCH 16/38] add doctest

---
 machine_learning/forecasting/run.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index d7cb01569ae1..60a330ce7bcf 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -23,6 +23,8 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
     First method: linear regression
     input : training data (date, total_user, total_event) in list of float
     output : list of total user prediction in float
+    >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4] [2,1], [2,2])
+    [3.95, 4.25]
     """
     x = []
     for i in range(len(train_dt)):
@@ -40,6 +42,8 @@ def sarimax_predictor(train_user, train_evnt, test_evnt):
     input : training data (total_user,
             with exog data = total_event) in list of float
     output : list of total user prediction in float
+    >>> sarimax_predictor([5,7,8,9], [3,1,2,4], [2,1])
+    [10.67, 13.15]
     """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
@@ -55,6 +59,8 @@ def support_machine_regressor(x_train, x_test, train_user):
     input : training data (date, total_user, total_event) in list of float
             where x = list of set (date and total event)
     output : list of total user prediction in float
+    >>> support_machine_regressor([5,7,8,9], [3,1,2,4], [2,1])
+    [11.23, 12.23]
     """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
@@ -69,6 +75,8 @@ def interquartile_range_checker(train_user):
     input : list of total user in float
     output : low limit of input in float
     this method can be used to check whether some data is outlier or not
+    >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10])
+    2.20
     """
     train_user.sort()
     q1 = np.percentile(train_user, 25)

From 13d23b66f50421e95288bfc3c7e7a16b7dc19bde Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 09:39:06 +0700
Subject: [PATCH 17/38] finding whitespace

---
 machine_learning/forecasting/run.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 60a330ce7bcf..97a198818857 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -37,14 +37,6 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
 
 
 def sarimax_predictor(train_user, train_evnt, test_evnt):
-    """
-    second method: sarimax
-    input : training data (total_user,
-            with exog data = total_event) in list of float
-    output : list of total user prediction in float
-    >>> sarimax_predictor([5,7,8,9], [3,1,2,4], [2,1])
-    [10.67, 13.15]
-    """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order)

From aa0daa511bf6a4b7ea5dd548d14222fe2a2df61a Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 09:59:29 +0700
Subject: [PATCH 18/38] fixing doctest

---
 machine_learning/forecasting/run.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 97a198818857..839ebae9446d 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,13 +19,6 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
-    """
-    First method: linear regression
-    input : training data (date, total_user, total_event) in list of float
-    output : list of total user prediction in float
-    >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4] [2,1], [2,2])
-    [3.95, 4.25]
-    """
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_evnt[i]])
@@ -37,6 +30,14 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
 
 
 def sarimax_predictor(train_user, train_evnt, test_evnt):
+    """
+    second method: sarimax
+    input : training data (total_user,
+            with exog data = total_event) in list of float
+    output : list of total user prediction in float
+    >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
+    6.6666671111109626
+    """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order)
@@ -51,13 +52,12 @@ def support_machine_regressor(x_train, x_test, train_user):
     input : training data (date, total_user, total_event) in list of float
             where x = list of set (date and total event)
     output : list of total user prediction in float
-    >>> support_machine_regressor([5,7,8,9], [3,1,2,4], [2,1])
-    [11.23, 12.23]
+    >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
+    1.634932078116079
     """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
-
     return y_pred[0]
 
 
@@ -68,7 +68,7 @@ def interquartile_range_checker(train_user):
     output : low limit of input in float
     this method can be used to check whether some data is outlier or not
     >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10])
-    2.20
+    2.8
     """
     train_user.sort()
     q1 = np.percentile(train_user, 25)

From 1e6f92362a3f3272011459f8ff920ad3a427a06c Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:05:39 +0700
Subject: [PATCH 19/38] delete

---
 machine_learning/forecasting/run.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 839ebae9446d..cb008162cba2 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -30,14 +30,6 @@ def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
 
 
 def sarimax_predictor(train_user, train_evnt, test_evnt):
-    """
-    second method: sarimax
-    input : training data (total_user,
-            with exog data = total_event) in list of float
-    output : list of total user prediction in float
-    >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
-    6.6666671111109626
-    """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order)
@@ -47,14 +39,6 @@ def sarimax_predictor(train_user, train_evnt, test_evnt):
 
 
 def support_machine_regressor(x_train, x_test, train_user):
-    """
-    Third method: SVR
-    input : training data (date, total_user, total_event) in list of float
-            where x = list of set (date and total event)
-    output : list of total user prediction in float
-    >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
-    1.634932078116079
-    """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)

From ab98e753dce2fb68817ef507dc8023dc9d9bab44 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:10:38 +0700
Subject: [PATCH 20/38] revert back

---
 machine_learning/forecasting/run.py | 32 ++++++++++++-----------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index cb008162cba2..0f3144eb166e 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -18,23 +18,23 @@
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
-def lin_reg_pred(train_dt, train_usr, train_evnt, test_dt, test_evnt):
+def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     x = []
     for i in range(len(train_dt)):
-        x.append([1, train_dt[i], train_evnt[i]])
+        x.append([1, train_dt[i], train_mtch[i]])
     x = np.array(x)
     y = np.array(train_usr)
     beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
-    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_evnt[0] + beta[2])
+    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
     return prediction
 
 
-def sarimax_predictor(train_user, train_evnt, test_evnt):
+def sarimax_predictor(train_user, train_match, test_match):
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
-    model = SARIMAX(train_user, exog=train_evnt, order=order, seasonal_order=s_order)
+    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
     model_fit = model.fit(disp=False, maxiter=600, method="nm")
-    result = model_fit.predict(1, len(test_evnt), exog=[test_evnt])
+    result = model_fit.predict(1, len(test_match), exog=[test_match])
     return result[0]
 
 
@@ -42,18 +42,11 @@ def support_machine_regressor(x_train, x_test, train_user):
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
+
     return y_pred[0]
 
 
 def interquartile_range_checker(train_user):
-    """
-    Optional method: interquatile range
-    input : list of total user in float
-    output : low limit of input in float
-    this method can be used to check whether some data is outlier or not
-    >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10])
-    2.8
-    """
     train_user.sort()
     q1 = np.percentile(train_user, 25)
     q3 = np.percentile(train_user, 75)
@@ -83,6 +76,7 @@ def data_safety_checker(list_vote, actual_result):
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
 list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
 data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
+
 """
 data column = total user in a day, how much online event held in one day,
 what day is that(sunday-saturday)
@@ -93,7 +87,7 @@ def data_safety_checker(list_vote, actual_result):
 # split data
 total_date = normalize_df[:, 2].tolist()
 total_user = normalize_df[:, 0].tolist()
-total_event = normalize_df[:, 1].tolist()
+total_match = normalize_df[:, 1].tolist()
 
 # for svr (input variable = total date and total match)
 x = normalize_df[:, [1, 2]].tolist()
@@ -103,17 +97,17 @@ def data_safety_checker(list_vote, actual_result):
 # for linear reression & sarimax
 trn_date = total_date[: len(total_date) - 1]
 trn_user = total_user[: len(total_user) - 1]
-trn_event = total_event[: len(total_event) - 1]
+trn_match = total_match[: len(total_match) - 1]
 
 tst_date = total_date[len(total_date) - 1 :]
 tst_user = total_user[len(total_user) - 1 :]
-tst_event = total_event[len(total_event) - 1 :]
+tst_match = total_match[len(total_match) - 1 :]
 
 
 # voting system with forecasting
 res_vote = []
-res_vote.append(lin_reg_pred(trn_date, trn_user, trn_event, tst_date, tst_event))
-res_vote.append(sarimax_predictor(trn_user, trn_event, tst_event))
+res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match))
+res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match))
 res_vote.append(support_machine_regressor(x_train, x_test, trn_user))
 
 # check the safety of todays'data^^

From 38513880c9a383a4dc04aa8c09a4efcc1cf28781 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:13:54 +0700
Subject: [PATCH 21/38] revert back

---
 machine_learning/forecasting/run.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 0f3144eb166e..b773ede5f91c 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,6 +19,7 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
+    # linear regression, return float
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_mtch[i]])
@@ -30,6 +31,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
 
 
 def sarimax_predictor(train_user, train_match, test_match):
+    # sarimax, return list of float
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
@@ -39,6 +41,7 @@ def sarimax_predictor(train_user, train_match, test_match):
 
 
 def support_machine_regressor(x_train, x_test, train_user):
+    # svr, return list of float
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
@@ -47,6 +50,8 @@ def support_machine_regressor(x_train, x_test, train_user):
 
 
 def interquartile_range_checker(train_user):
+    # optional
+    # return low limit and upper limit for outlier
     train_user.sort()
     q1 = np.percentile(train_user, 25)
     q3 = np.percentile(train_user, 75)

From 6fb8a22d4e45608f6b0b61f4eb144190edf12875 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:21:58 +0700
Subject: [PATCH 22/38] revert back again

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b773ede5f91c..fc52d90f3cef 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,7 +19,7 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return float
+    # linear regression, return floatss
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_mtch[i]])

From 574d25d449970b094d4393bb5fe34c86f99715a7 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:27:02 +0700
Subject: [PATCH 23/38] revert back again

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index fc52d90f3cef..03a609f6b471 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,7 +19,7 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return floatss
+    # linear regression, return floats
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_mtch[i]])

From c151dd5b57c0f1cef7ec6a776b68b9729ecd9a3a Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 10:29:01 +0700
Subject: [PATCH 24/38] revert back again

---
 machine_learning/forecasting/run.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 03a609f6b471..22fe9a532fd7 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -45,7 +45,6 @@ def support_machine_regressor(x_train, x_test, train_user):
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
-
     return y_pred[0]
 
 
@@ -57,7 +56,6 @@ def interquartile_range_checker(train_user):
     q3 = np.percentile(train_user, 75)
     iqr = q3 - q1
     low_lim = q1 - (iqr * 0.1)
-
     return low_lim
 
 

From bf45d77ea56a36f1451e7676c2e59a23fce2f19d Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 16:09:40 +0700
Subject: [PATCH 25/38] try trimming whitespace

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 22fe9a532fd7..5367d2026517 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,7 +19,7 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return floats
+    # linear regression, return floatss
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_mtch[i]])

From 4cdec16f69ca03dea507631679e1afd1b00be751 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 16:24:20 +0700
Subject: [PATCH 26/38] try adding doctypeand etc

---
 machine_learning/forecasting/run.py | 36 +++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 5367d2026517..b9dc34bbcefe 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -19,7 +19,13 @@
 
 
 def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
-    # linear regression, return floatss
+    """
+    First method: linear regression
+    input : training data (date, total_user, total_event) in list of float
+    output : list of total user prediction in float
+    >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
+    3.0000424034255513
+    """
     x = []
     for i in range(len(train_dt)):
         x.append([1, train_dt[i], train_mtch[i]])
@@ -31,7 +37,14 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
 
 
 def sarimax_predictor(train_user, train_match, test_match):
-    # sarimax, return list of float
+    """
+    second method: sarimax
+    input : training data (total_user,
+            with exog data = total_event) in list of float
+    output : list of total user prediction in float
+    >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
+    6.6666671111109626
+    """
     order = (1, 2, 1)
     s_order = (1, 1, 0, 7)
     model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
@@ -41,7 +54,14 @@ def sarimax_predictor(train_user, train_match, test_match):
 
 
 def support_machine_regressor(x_train, x_test, train_user):
-    # svr, return list of float
+    """
+    Third method: SVR
+    input : training data (date, total_user, total_event) in list of float
+            where x = list of set (date and total event)
+    output : list of total user prediction in float
+    >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
+    1.634932078116079
+    """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
     regressor.fit(x_train, train_user)
     y_pred = regressor.predict(x_test)
@@ -49,8 +69,14 @@ def support_machine_regressor(x_train, x_test, train_user):
 
 
 def interquartile_range_checker(train_user):
-    # optional
-    # return low limit and upper limit for outlier
+    """
+    Optional method: interquatile range
+    input : list of total user in float
+    output : low limit of input in float
+    this method can be used to check whether some data is outlier or not
+    >>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10])
+    2.8
+    """
     train_user.sort()
     q1 = np.percentile(train_user, 25)
     q3 = np.percentile(train_user, 75)

From 43c9d4ccf791508c37b2a7b814f01576903e583b Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 16:58:54 +0700
Subject: [PATCH 27/38] fixing reviews

---
 machine_learning/forecasting/run.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b9dc34bbcefe..d95858f051fe 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -23,12 +23,11 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     First method: linear regression
     input : training data (date, total_user, total_event) in list of float
     output : list of total user prediction in float
-    >>> lin_reg_red([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
+    >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
     3.0000424034255513
     """
     x = []
-    for i in range(len(train_dt)):
-        x.append([1, train_dt[i], train_mtch[i]])
+    x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)]
     x = np.array(x)
     y = np.array(train_usr)
     beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
@@ -39,15 +38,18 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
 def sarimax_predictor(train_user, train_match, test_match):
     """
     second method: sarimax
-    input : training data (total_user,
-            with exog data = total_event) in list of float
+    sarimax is a statistic method which using previous input
+    and learn its pattern to predict future data
+    input : training data (total_user, with exog data = total_event) in list of float
     output : list of total user prediction in float
     >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
     6.6666671111109626
     """
     order = (1, 2, 1)
-    s_order = (1, 1, 0, 7)
-    model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=s_order)
+    seasonal_order = (1, 1, 0, 7)
+    model = SARIMAX(
+        train_user, exog=train_match, order=order, seasonal_order=seasonal_order
+    )
     model_fit = model.fit(disp=False, maxiter=600, method="nm")
     result = model_fit.predict(1, len(test_match), exog=[test_match])
     return result[0]
@@ -55,7 +57,11 @@ def sarimax_predictor(train_user, train_match, test_match):
 
 def support_machine_regressor(x_train, x_test, train_user):
     """
-    Third method: SVR
+    Third method: Support vector regressor
+    svr is quite the same with svm(support vector machine)
+    it uses the same principles as the SVM for classification,
+    with only a few minor differences and the only different is that
+    it suits better for regression purpose
     input : training data (date, total_user, total_event) in list of float
             where x = list of set (date and total event)
     output : list of total user prediction in float

From 14ccdc88ac0c1e98c87aa37b69f5605c310656c9 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 17:05:42 +0700
Subject: [PATCH 28/38] deleting all the space

---
 machine_learning/forecasting/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index d95858f051fe..e81b7fa4804a 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -43,7 +43,7 @@ def sarimax_predictor(train_user, train_match, test_match):
     input : training data (total_user, with exog data = total_event) in list of float
     output : list of total user prediction in float
     >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
-    6.6666671111109626
+    3.0000424034255513
     """
     order = (1, 2, 1)
     seasonal_order = (1, 1, 0, 7)
@@ -63,7 +63,7 @@ def support_machine_regressor(x_train, x_test, train_user):
     with only a few minor differences and the only different is that
     it suits better for regression purpose
     input : training data (date, total_user, total_event) in list of float
-            where x = list of set (date and total event)
+    where x = list of set (date and total event)
     output : list of total user prediction in float
     >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
     1.634932078116079

From 8194902ad19fb49b9c91160faeba05b16ecf34af Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 17:11:25 +0700
Subject: [PATCH 29/38] fixing the build

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index e81b7fa4804a..2f7e25167bcc 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -24,7 +24,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     input : training data (date, total_user, total_event) in list of float
     output : list of total user prediction in float
     >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
-    3.0000424034255513
+    5.000000000000003
     """
     x = []
     x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)]

From eab1d3b1b74d87f9e1af1df9b7e484ce1b4c2601 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 17:15:33 +0700
Subject: [PATCH 30/38] delete x

---
 machine_learning/forecasting/run.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 2f7e25167bcc..f5d33d438478 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -26,7 +26,6 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
     5.000000000000003
     """
-    x = []
     x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)]
     x = np.array(x)
     y = np.array(train_usr)

From d34853f089f031762f1c3e86238dce0b12c344fa Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 19:34:19 +0700
Subject: [PATCH 31/38] add description for safety checker

---
 machine_learning/forecasting/run.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index f5d33d438478..d6c2cc564557 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -42,7 +42,7 @@ def sarimax_predictor(train_user, train_match, test_match):
     input : training data (total_user, with exog data = total_event) in list of float
     output : list of total user prediction in float
     >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
-    3.0000424034255513
+    6.6666671111109626
     """
     order = (1, 2, 1)
     seasonal_order = (1, 1, 0, 7)
@@ -91,6 +91,14 @@ def interquartile_range_checker(train_user):
 
 
 def data_safety_checker(list_vote, actual_result):
+    """
+    Used to review all the votes (list result prediction)
+    and compare it to the actual result.
+    input : list of predictions
+    output : print whether it's safe or not
+    >>> data_safety_checker([2,3,4],5)
+    today's data is safe
+    """
     safe = 0
     not_safe = 0
     for i in list_vote:
@@ -108,8 +116,8 @@ def data_safety_checker(list_vote, actual_result):
 
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
-list_data = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-data_input_df = pd.DataFrame(list_data, columns=["total_user", "total_even", "days"])
+data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
+data_input_df = pd.DataFrame(data_input, columns=["total_user", "total_even", "days"])
 
 """
 data column = total user in a day, how much online event held in one day,

From 81effc54ff18aab234c166dd60e394ae8ecf45f9 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 19:42:28 +0700
Subject: [PATCH 32/38] deleting subscription integer

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index d6c2cc564557..95d28aadfd22 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -105,7 +105,7 @@ def data_safety_checker(list_vote, actual_result):
         if i > actual_result:
             safe = not_safe + 1
         else:
-            if abs(abs(i) - abs(actual_result[0])) <= 0.1:
+            if abs(abs(i) - abs(actual_result)) <= 0.1:
                 safe = safe + 1
             else:
                 not_safe = not_safe + 1

From 6c8f1af0d6f30e7816281ec7325f7ff8d43f80ce Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Tue, 13 Oct 2020 19:50:00 +0700
Subject: [PATCH 33/38] fix docthint

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 95d28aadfd22..b48998118e39 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -97,7 +97,7 @@ def data_safety_checker(list_vote, actual_result):
     input : list of predictions
     output : print whether it's safe or not
     >>> data_safety_checker([2,3,4],5)
-    today's data is safe
+    today's data is not safe
     """
     safe = 0
     not_safe = 0

From 070cba4e2b6192f04215a0711d5754608f34c92b Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Sat, 24 Oct 2020 16:45:34 +0700
Subject: [PATCH 34/38] make def to use function parameters and return values

---
 machine_learning/forecasting/run.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index b48998118e39..7e3f7269e755 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -18,7 +18,9 @@
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
-def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
+def lin_reg_pred(
+    train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
+) -> float:
     """
     First method: linear regression
     input : training data (date, total_user, total_event) in list of float
@@ -34,7 +36,7 @@ def lin_reg_pred(train_dt, train_usr, train_mtch, test_dt, test_mtch):
     return prediction
 
 
-def sarimax_predictor(train_user, train_match, test_match):
+def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float:
     """
     second method: sarimax
     sarimax is a statistic method which using previous input
@@ -54,7 +56,7 @@ def sarimax_predictor(train_user, train_match, test_match):
     return result[0]
 
 
-def support_machine_regressor(x_train, x_test, train_user):
+def support_machine_regressor(x_train: list, x_test: list, train_user: list) -> float:
     """
     Third method: Support vector regressor
     svr is quite the same with svm(support vector machine)
@@ -73,7 +75,7 @@ def support_machine_regressor(x_train, x_test, train_user):
     return y_pred[0]
 
 
-def interquartile_range_checker(train_user):
+def interquartile_range_checker(train_user: list) -> float:
     """
     Optional method: interquatile range
     input : list of total user in float

From 28ac6494ca670ae1a378a4fd2e87890ab1346563 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Sat, 24 Oct 2020 20:06:05 +0700
Subject: [PATCH 35/38] make def to use function parameters and return values

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 7e3f7269e755..bf3fe3f2dfec 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -38,7 +38,7 @@ def lin_reg_pred(
 
 def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float:
     """
-    second method: sarimax
+    second method: Sarimax
     sarimax is a statistic method which using previous input
     and learn its pattern to predict future data
     input : training data (total_user, with exog data = total_event) in list of float

From da69f838c03e3893b01a5861a51662d062508bd8 Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Sat, 24 Oct 2020 20:42:51 +0700
Subject: [PATCH 36/38] type hints on data safety checker

---
 machine_learning/forecasting/run.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index bf3fe3f2dfec..346d94fa0b45 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -18,22 +18,21 @@
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
-def lin_reg_pred(
+def linear_regression_prediction(
     train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
 ) -> float:
     """
     First method: linear regression
     input : training data (date, total_user, total_event) in list of float
     output : list of total user prediction in float
-    >>> lin_reg_pred([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
+    >>> linear_regression_prediction([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
     5.000000000000003
     """
     x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)]
     x = np.array(x)
     y = np.array(train_usr)
     beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
-    prediction = abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
-    return prediction
+    return abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
 
 
 def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float:
@@ -56,7 +55,7 @@ def sarimax_predictor(train_user: list, train_match: list, test_match: list) ->
     return result[0]
 
 
-def support_machine_regressor(x_train: list, x_test: list, train_user: list) -> float:
+def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> float:
     """
     Third method: Support vector regressor
     svr is quite the same with svm(support vector machine)
@@ -66,7 +65,7 @@ def support_machine_regressor(x_train: list, x_test: list, train_user: list) ->
     input : training data (date, total_user, total_event) in list of float
     where x = list of set (date and total event)
     output : list of total user prediction in float
-    >>> support_machine_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
+    >>> support_vector_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
     1.634932078116079
     """
     regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
@@ -99,7 +98,7 @@ def data_safety_checker(list_vote, actual_result):
     input : list of predictions
     output : print whether it's safe or not
     >>> data_safety_checker([2,3,4],5)
-    today's data is not safe
+    today's data = not safe
     """
     safe = 0
     not_safe = 0
@@ -111,10 +110,7 @@ def data_safety_checker(list_vote, actual_result):
                 safe = safe + 1
             else:
                 not_safe = not_safe + 1
-    if safe > not_safe:
-        print("today's data is safe")
-    else:
-        print("today's data is not safe")
+    print("today's data =", "not" if safe <= not_safe else "", "safe")
 
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)
@@ -150,9 +146,11 @@ def data_safety_checker(list_vote, actual_result):
 
 # voting system with forecasting
 res_vote = []
-res_vote.append(lin_reg_pred(trn_date, trn_user, trn_match, tst_date, tst_match))
+res_vote.append(
+    linear_regression_prediction(trn_date, trn_user, trn_match, tst_date, tst_match)
+)
 res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match))
-res_vote.append(support_machine_regressor(x_train, x_test, trn_user))
+res_vote.append(support_vector_regressor(x_train, x_test, trn_user))
 
 # check the safety of todays'data^^
 data_safety_checker(res_vote, tst_user)

From 3b18a483fb1de578f38df701e15f750676af8f9b Mon Sep 17 00:00:00 2001
From: FVFYK3GEHV22 <fvfyk3gehv22@FVFYK3GEHV22s-MacBook-Pro.local>
Date: Sat, 24 Oct 2020 20:45:38 +0700
Subject: [PATCH 37/38] optimize code

---
 machine_learning/forecasting/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 346d94fa0b45..c33b1a926a08 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -91,13 +91,13 @@ def interquartile_range_checker(train_user: list) -> float:
     return low_lim
 
 
-def data_safety_checker(list_vote, actual_result):
+def data_safety_checker(list_vote: list, actual_result: float):
     """
     Used to review all the votes (list result prediction)
     and compare it to the actual result.
     input : list of predictions
     output : print whether it's safe or not
-    >>> data_safety_checker([2,3,4],5)
+    >>> data_safety_checker([2,3,4],5.0)
     today's data = not safe
     """
     safe = 0

From 85723b60754653a91e84e0dcd227ef0d4578f7ad Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Sat, 24 Oct 2020 16:02:12 +0200
Subject: [PATCH 38/38] Update run.py

---
 machine_learning/forecasting/run.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index c33b1a926a08..467371e8d2ff 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -91,14 +91,14 @@ def interquartile_range_checker(train_user: list) -> float:
     return low_lim
 
 
-def data_safety_checker(list_vote: list, actual_result: float):
+def data_safety_checker(list_vote: list, actual_result: float) -> None:
     """
     Used to review all the votes (list result prediction)
     and compare it to the actual result.
     input : list of predictions
     output : print whether it's safe or not
     >>> data_safety_checker([2,3,4],5.0)
-    today's data = not safe
+    Today's data is not safe.
     """
     safe = 0
     not_safe = 0
@@ -110,7 +110,7 @@ def data_safety_checker(list_vote: list, actual_result: float):
                 safe = safe + 1
             else:
                 not_safe = not_safe + 1
-    print("today's data =", "not" if safe <= not_safe else "", "safe")
+    print(f"Today's data is {'not ' if safe <= not_safe else ''}safe.")
 
 
 # data_input_df = pd.read_csv("ex_data.csv", header=None)