Skip to content

Update run.py in machine_learning/forecasting #8957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion machine_learning/forecasting/ex_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
total_user,total_events,days
total_users,total_events,days
18231,0.0,1
22621,1.0,2
15675,0.0,3
Expand Down
38 changes: 21 additions & 17 deletions machine_learning/forecasting/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
this is code for forecasting
but i modified it and used it for safety checker of data
but I modified it and used it for safety checker of data
for ex: you have an online shop and for some reason some data are
missing (the amount of data that u expected are not supposed to be)
then we can use it
Expand Down Expand Up @@ -102,6 +102,10 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
"""
safe = 0
not_safe = 0

if not isinstance(actual_result, float):
raise TypeError("Actual result should be float. Value passed is a list")

for i in list_vote:
if i > actual_result:
safe = not_safe + 1
Expand All @@ -114,11 +118,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:


if __name__ == "__main__":
# data_input_df = pd.read_csv("ex_data.csv", header=None)
data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
data_input_df = pd.DataFrame(
data_input, columns=["total_user", "total_even", "days"]
)
data_input_df = pd.read_csv("ex_data.csv")
# data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
# data_input_df = pd.DataFrame(
# data_input, columns=["total_user", "total_even", "days"]
# )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
# data_input_df = pd.DataFrame(
# data_input, columns=["total_user", "total_even", "days"]
# )

Let's just delete the old code rather than commenting it out


"""
data column = total user in a day, how much online event held in one day,
Expand All @@ -138,23 +142,23 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
x_test = x[len(x) - 1 :]

# for linear regression & sarimax
trn_date = total_date[: len(total_date) - 1]
trn_user = total_user[: len(total_user) - 1]
trn_match = total_match[: len(total_match) - 1]
train_date = total_date[: len(total_date) - 1]
train_user = total_user[: len(total_user) - 1]
train_match = total_match[: len(total_match) - 1]

tst_date = total_date[len(total_date) - 1 :]
tst_user = total_user[len(total_user) - 1 :]
tst_match = total_match[len(total_match) - 1 :]
test_date = total_date[len(total_date) - 1 :]
test_user = total_user[len(total_user) - 1 :]
test_match = total_match[len(total_match) - 1 :]

# voting system with forecasting
res_vote = [
linear_regression_prediction(
trn_date, trn_user, trn_match, tst_date, tst_match
train_date, train_user, train_match, test_date, test_match
),
sarimax_predictor(trn_user, trn_match, tst_match),
support_vector_regressor(x_train, x_test, trn_user),
sarimax_predictor(train_user, train_match, test_match),
support_vector_regressor(x_train, x_test, train_user),
]

# check the safety of today's data
not_str = "" if data_safety_checker(res_vote, tst_user) else "not "
print("Today's data is {not_str}safe.")
not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
print(f"Today's data is {not_str}safe.")