From a1433fdb16f3f2793997a5c9e541b50410a25f38 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 15:05:31 +0530 Subject: [PATCH 01/17] Stock market prediction using greadient boosting --- ...ecast-using-gradient-boosting-regressor.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py diff --git a/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py b/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py new file mode 100644 index 000000000000..08f648dc5af5 --- /dev/null +++ b/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# coding: utf-8 + +#

Problem Statement: Stock Market Analysis and Prediction +# +# Explanation: Our aim is to create software that analyses previous stock data of certain companies, +# with help of certain parameters that affect stock value. We are going to implement these values in data mining algorithms. +# This will also help us to determine the values that particular stock will have in near future. +# We will determine the Month’s High and Low with help of data mining algorithms. +# In this project we are going to take a five years of stock data for our analysis and prediction + + +#Install the dependencies pip install quandl +import quandl +import numpy as np +#plotly.offline.init_notebook_mode(connected=True) +import plotly.offline as py +from sklearn.model_selection import train_test_split +from plotly.offline import iplot, init_notebook_mode +init_notebook_mode() +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.metrics import r2_score, mean_squared_error +import matplotlib.pyplot as plt + + +# Get the stock data +df = quandl.get("WIKI/MSFT") +# Take a look at the data +print(df.head()) + + +import plotly.express as px +fig = px.scatter(df, x="High", y="Low") +fig.show() + + +# Get the Adjusted Close Price +df = df[['Adj. Close']] +# Take a look at the new data +print(df.head()) + + + +# A variable for predicting 'n' days out into the future +forecast_out = 30 #'n=30' days +#Create another column (the target ) shifted 'n' units up +df['Prediction'] = df[['Adj. Close']].shift(-forecast_out) +#print the new data set +print(df.tail()) + + +# Convert the dataframe to a numpy array +X = np.array(df.drop(['Prediction'],1)) + +#Remove the last '30' rows +X = X[:-forecast_out] +print(X) + + + +### Create the dependent data set (y) ##### +# Convert the dataframe to a numpy array +y = np.array(df['Prediction']) +# Get all of the y values except the last '30' rows +y = y[:-forecast_out] +print(y) + + +x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + + + +params = { + 'loss':'ls', + 'learning_rate':0.1, + 'n_estimators':500, + 'min_samples_split':2, + 'min_weight_fraction_leaf':0.0, + 'max_depth':3, + +} +model = GradientBoostingRegressor(**params) +model.fit(x_train,y_train) +model.score(x_train,y_train).round(3) +model.score(x_test,y_test).round(3) +y_pred = model.predict(x_test) +print('The mean squared error is: ', mean_squared_error(y_test,y_pred)) +print('The variance is: ', r2_score(y_test,y_pred)) + +# So let's run the model against the test data +from sklearn.model_selection import cross_val_predict + +fig, ax = plt.subplots() +ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) +ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) +ax.set_xlabel('Actual') +ax.set_ylabel('Predicted') +ax.set_title("Ground Truth vs Predicted") +plt.show() +# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. It is a generalization of the idea of using the sum of squares +#of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood. +test_score = np.zeros((params['n_estimators'],), dtype=np.float64) +for i, y_pred in enumerate(model.staged_predict(x_test)): + test_score[i] = model.loss_(y_test, y_pred) + +fig = plt.figure(figsize=(10, 6)) +plt.subplot(1, 1, 1) +plt.title('Deviance') +plt.plot(np.arange(params['n_estimators']) + 1, model.train_score_, 'b-', + label='Training Set Deviance') +plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', + label='Test Set Deviance') +plt.legend(loc='upper right') +plt.xlabel('Boosting Iterations') +plt.ylabel('Deviance') +fig.tight_layout() +plt.show() + + + + + From 576faf809636045738928004a511ffc3120e5eb1 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 16:19:31 +0530 Subject: [PATCH 02/17] To reverse a string using stack --- .../stacks/reverse_string_using_stack.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 data_structures/stacks/reverse_string_using_stack.py diff --git a/data_structures/stacks/reverse_string_using_stack.py b/data_structures/stacks/reverse_string_using_stack.py new file mode 100644 index 000000000000..589dc1f651fc --- /dev/null +++ b/data_structures/stacks/reverse_string_using_stack.py @@ -0,0 +1,39 @@ +#Write a program that take input as string stack and return the reverse of that string in python + +class Stack(): + def __init__(self): + self.items = [] +# this function will take the item and push onto the top of the stack + def push(self, item): + return self.items.append(item) +# this function pop will take out the top element from the stack + def pop(self): + return self.items.pop() +# to check if the stack if empy without this we can't reverse the given string. + def is_empty(self): + return self.items == [] +# this function is optional it will just return to top of the stack when called + def peek(self): + if not is_empty(): + return self.items[-1] + +def _reverse_(Stack, input_str_): + # loop through the string then push it into the stack then + # pop the item fro tne stack append to an empty string variable + + for i in range(len(input_str_)): + + Stack.push(input_str_[i]) +# this empty string is where we are going to store the reversed string + result = '' + while not Stack.is_empty(): + result = result + Stack.pop() + return result + + +stack = Stack() +#input_str_ = ' Abdoulaye' +input_str_= input("Enter the string to reverse: ") +print(_reverse_(stack, input_str_)) +# Enter the string to reverse: Hello world +# dlrow olleH From 607e36dd0d85e401fc49ba11ad9a773daabdeefd Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 16:35:51 +0530 Subject: [PATCH 03/17] To reverse string using stack --- data_structures/stacks/reverse_string_using_stack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/stacks/reverse_string_using_stack.py b/data_structures/stacks/reverse_string_using_stack.py index 589dc1f651fc..c2614a79dae0 100644 --- a/data_structures/stacks/reverse_string_using_stack.py +++ b/data_structures/stacks/reverse_string_using_stack.py @@ -9,7 +9,7 @@ def push(self, item): # this function pop will take out the top element from the stack def pop(self): return self.items.pop() -# to check if the stack if empy without this we can't reverse the given string. +# to check if the stack if empty without this we can't reverse the given string. def is_empty(self): return self.items == [] # this function is optional it will just return to top of the stack when called @@ -19,7 +19,7 @@ def peek(self): def _reverse_(Stack, input_str_): # loop through the string then push it into the stack then - # pop the item fro tne stack append to an empty string variable + # pop the item from the stack append to an empty string variable for i in range(len(input_str_)): From d77a3be3fb1f2a8934d29d69b4d6cdb51023bc1d Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 17:02:09 +0530 Subject: [PATCH 04/17] Predict Stock Prices Python & Machine Learning --- ...ecast-using-gradient-boosting-regressor.py | 42 ++----------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py b/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py index 08f648dc5af5..822e79175553 100644 --- a/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py +++ b/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py @@ -1,76 +1,41 @@ -#!/usr/bin/env python -# coding: utf-8 - -#

Problem Statement: Stock Market Analysis and Prediction -# -# Explanation: Our aim is to create software that analyses previous stock data of certain companies, -# with help of certain parameters that affect stock value. We are going to implement these values in data mining algorithms. -# This will also help us to determine the values that particular stock will have in near future. -# We will determine the Month’s High and Low with help of data mining algorithms. -# In this project we are going to take a five years of stock data for our analysis and prediction - - +""" We are going to predict the adj close price of microsoft stock price.""" #Install the dependencies pip install quandl import quandl import numpy as np -#plotly.offline.init_notebook_mode(connected=True) -import plotly.offline as py from sklearn.model_selection import train_test_split -from plotly.offline import iplot, init_notebook_mode -init_notebook_mode() from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score, mean_squared_error import matplotlib.pyplot as plt - - # Get the stock data df = quandl.get("WIKI/MSFT") # Take a look at the data print(df.head()) - - import plotly.express as px fig = px.scatter(df, x="High", y="Low") fig.show() - - # Get the Adjusted Close Price df = df[['Adj. Close']] # Take a look at the new data print(df.head()) - - - # A variable for predicting 'n' days out into the future forecast_out = 30 #'n=30' days #Create another column (the target ) shifted 'n' units up df['Prediction'] = df[['Adj. Close']].shift(-forecast_out) #print the new data set print(df.tail()) - - # Convert the dataframe to a numpy array X = np.array(df.drop(['Prediction'],1)) - #Remove the last '30' rows X = X[:-forecast_out] print(X) - - - ### Create the dependent data set (y) ##### # Convert the dataframe to a numpy array y = np.array(df['Prediction']) # Get all of the y values except the last '30' rows y = y[:-forecast_out] print(y) - - x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - - - +#these are the parametes that we are given to the gradient boosting regressor params = { 'loss':'ls', 'learning_rate':0.1, @@ -98,7 +63,8 @@ ax.set_ylabel('Predicted') ax.set_title("Ground Truth vs Predicted") plt.show() -# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. It is a generalization of the idea of using the sum of squares +# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. +#It is a generalization of the idea of using the sum of squares #of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood. test_score = np.zeros((params['n_estimators'],), dtype=np.float64) for i, y_pred in enumerate(model.staged_predict(x_test)): From 2b153866670c260534f402751b27810136661ffd Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 23:18:45 +0530 Subject: [PATCH 05/17] Gradient boosting regressor on boston dataset --- .../Gradient-boosting-regressor.py | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 machine_learning/Gradient-boosting-regressor.py diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py new file mode 100644 index 000000000000..3d27b465b50a --- /dev/null +++ b/machine_learning/Gradient-boosting-regressor.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[17]: + + +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to + predict house price. +""" +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.datasets import load_boston +from sklearn.metrics import mean_squared_error,r2_score +from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor +from sklearn.model_selection import train_test_split + +def main(): + # loading the dataset from the sklearn package + df = load_boston() + print(df.keys()) + # now let constract a data frame with data and target variables + df_boston = pd.DataFrame(df.data,columns =df.feature_names) + # let add the target to the dataframe + df_boston['Price']= df.target + # let us print the first five rows using the head function + print(df_boston.head()) + print(df_boston.describe().T) # to see summary statistics of the dataset + # Feature selection means for independant and dependent variables + X = df_boston.iloc[:,:-1] + y = df_boston.iloc[:,-1] # target variable + # we are going to split the data with 75% train and 25% test sets. + X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25) + # now let set the parametes of our model + params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4, + 'learning_rate': 0.01, 'loss': 'ls'} + model = GradientBoostingRegressor(**params) + # training the model + model.fit(X_train,y_train) + """ let have a look on the train and test score to see how good the model fit the data""" + score = model.score(X_train,y_train).round(3) + print("Training score of GradientBoosting is :",score) + print("the test score of GradienBoosting is :",model.score(X_test,y_test).round(3)) + # Let us evaluation the model by finding the errors + y_pred = model.predict(X_test) + + # The mean squared error + print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred)) + # Explained variance score: 1 is perfect prediction + print('Test Variance score: %.2f' % r2_score(y_test, y_pred)) + + # So let's run the model against the test data + fig, ax = plt.subplots() + ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) + ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) + ax.set_xlabel('Actual') + ax.set_ylabel('Predicted') + ax.set_title("Truth vs Predicted") + # this show function will display the ploting + plt.show() + + +if __name__ =='__main__': + main() + + +# In[ ]: + + + + From 54827b61818121483339564c0482a38b75a180d2 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 23:37:16 +0530 Subject: [PATCH 06/17] Gradient boosting regressor implementation --- machine_learning/Gradient-boosting-regressor.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py index 3d27b465b50a..7f62caad77a3 100644 --- a/machine_learning/Gradient-boosting-regressor.py +++ b/machine_learning/Gradient-boosting-regressor.py @@ -1,9 +1,3 @@ -#!/usr/bin/env python -# coding: utf-8 - -# In[17]: - - """Implementation of GradientBoostingRegressor in sklearn using the boston dataset which is very popular for regression problem to predict house price. @@ -19,19 +13,19 @@ def main(): # loading the dataset from the sklearn package df = load_boston() print(df.keys()) - # now let constract a data frame with data and target variables + # now let construct a data frame with data and target variables df_boston = pd.DataFrame(df.data,columns =df.feature_names) # let add the target to the dataframe df_boston['Price']= df.target # let us print the first five rows using the head function print(df_boston.head()) print(df_boston.describe().T) # to see summary statistics of the dataset - # Feature selection means for independant and dependent variables + # Feature selection means for independent and dependent variables X = df_boston.iloc[:,:-1] y = df_boston.iloc[:,-1] # target variable # we are going to split the data with 75% train and 25% test sets. X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25) - # now let set the parametes of our model + # now let set the parameters of the model params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4, 'learning_rate': 0.01, 'loss': 'ls'} model = GradientBoostingRegressor(**params) @@ -56,7 +50,7 @@ def main(): ax.set_xlabel('Actual') ax.set_ylabel('Predicted') ax.set_title("Truth vs Predicted") - # this show function will display the ploting + # this show function will display the plotting plt.show() From c5e2e099b4da2bc59d100eda51b2140270795de2 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Tue, 11 Aug 2020 23:53:35 +0530 Subject: [PATCH 07/17] Gradient boosting regressor --- .../Gradient-boosting-regressor.py | 72 ++++++++++--------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py index 7f62caad77a3..cb9d673c34da 100644 --- a/machine_learning/Gradient-boosting-regressor.py +++ b/machine_learning/Gradient-boosting-regressor.py @@ -2,64 +2,70 @@ boston dataset which is very popular for regression problem to predict house price. """ + import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_boston -from sklearn.metrics import mean_squared_error,r2_score -from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split + def main(): + # loading the dataset from the sklearn package df = load_boston() print(df.keys()) # now let construct a data frame with data and target variables - df_boston = pd.DataFrame(df.data,columns =df.feature_names) + df_boston = pd.DataFrame(df.data, columns=df.feature_names) # let add the target to the dataframe - df_boston['Price']= df.target + df_boston["Price"] = df.target # let us print the first five rows using the head function print(df_boston.head()) - print(df_boston.describe().T) # to see summary statistics of the dataset + print(df_boston.describe().T) # to see summary statistics of the dataset # Feature selection means for independent and dependent variables - X = df_boston.iloc[:,:-1] - y = df_boston.iloc[:,-1] # target variable + + X = df_boston.iloc[:, :-1] + y = df_boston.iloc[:, -1] # target variable # we are going to split the data with 75% train and 25% test sets. - X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25) - # now let set the parameters of the model - params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4, - 'learning_rate': 0.01, 'loss': 'ls'} + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0, test_size=0.25 + ) + # model parameter + params = { + "n_estimators": 500, + "max_depth": 5, + "min_samples_split": 4, + "learning_rate": 0.01, + "loss": "ls", + } model = GradientBoostingRegressor(**params) # training the model - model.fit(X_train,y_train) - """ let have a look on the train and test score to see how good the model fit the data""" - score = model.score(X_train,y_train).round(3) - print("Training score of GradientBoosting is :",score) - print("the test score of GradienBoosting is :",model.score(X_test,y_test).round(3)) - # Let us evaluation the model by finding the errors + model.fit(X_train, y_train) + """let have a look on the train and test score to see how good the model fit the data""" + score = model.score(X_train, y_train).round(3) + print("Training score of GradientBoosting is :", score) + print( + "the test score of GradienBoosting is :", model.score(X_test, y_test).round(3) + ) + # Let us evaluation the model by finding the errors y_pred = model.predict(X_test) # The mean squared error - print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred)) + print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) # Explained variance score: 1 is perfect prediction - print('Test Variance score: %.2f' % r2_score(y_test, y_pred)) - + print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) + # So let's run the model against the test data fig, ax = plt.subplots() ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) - ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) - ax.set_xlabel('Actual') - ax.set_ylabel('Predicted') + ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4) + ax.set_xlabel("Actual") + ax.set_ylabel("Predicted") ax.set_title("Truth vs Predicted") - # this show function will display the plotting + # this show function will display the plotting plt.show() - - -if __name__ =='__main__': - main() - - -# In[ ]: - - +if __name__ == "__main__": + main() From 74c8cd335200af7bcfceb12433981d28212cb7aa Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Wed, 12 Aug 2020 00:09:17 +0530 Subject: [PATCH 08/17] Gradient boosting regressor --- machine_learning/Gradient-boosting-regressor.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py index cb9d673c34da..baf33943a64a 100644 --- a/machine_learning/Gradient-boosting-regressor.py +++ b/machine_learning/Gradient-boosting-regressor.py @@ -29,17 +29,10 @@ def main(): y = df_boston.iloc[:, -1] # target variable # we are going to split the data with 75% train and 25% test sets. X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=0, test_size=0.25 - ) - # model parameter - params = { - "n_estimators": 500, - "max_depth": 5, - "min_samples_split": 4, - "learning_rate": 0.01, - "loss": "ls", - } - model = GradientBoostingRegressor(**params) + X, y, random_state=0, test_size=0.25) + + model = GradientBoostingRegressor(n_estimators = 500, + max_depth =5,min_samples_split=4,learning_rate=0.01 ) # training the model model.fit(X_train, y_train) """let have a look on the train and test score to see how good the model fit the data""" From 25552a17ccd09f1f6751ec324887c322db4c4354 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Wed, 12 Aug 2020 00:38:30 +0530 Subject: [PATCH 09/17] Gradient boosting regressor --- .../Gradient-boosting-regressor.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py index baf33943a64a..c07c047ce974 100644 --- a/machine_learning/Gradient-boosting-regressor.py +++ b/machine_learning/Gradient-boosting-regressor.py @@ -13,29 +13,33 @@ def main(): - # loading the dataset from the sklearn package + # loading the dataset from the sklearn df = load_boston() print(df.keys()) - # now let construct a data frame with data and target variables + # now let construct a data frame df_boston = pd.DataFrame(df.data, columns=df.feature_names) # let add the target to the dataframe df_boston["Price"] = df.target - # let us print the first five rows using the head function + # print the first five rows using the head function print(df_boston.head()) - print(df_boston.describe().T) # to see summary statistics of the dataset - # Feature selection means for independent and dependent variables + # Summary statistics + print(df_boston.describe().T) + # Feature selection X = df_boston.iloc[:, :-1] y = df_boston.iloc[:, -1] # target variable - # we are going to split the data with 75% train and 25% test sets. + # split the data with 75% train and 25% test sets. X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=0, test_size=0.25) + X, y, random_state=0, test_size=0.25 + ) - model = GradientBoostingRegressor(n_estimators = 500, - max_depth =5,min_samples_split=4,learning_rate=0.01 ) + model = GradientBoostingRegressor( + n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 + ) # training the model model.fit(X_train, y_train) - """let have a look on the train and test score to see how good the model fit the data""" + """let have a look on the train and test score + to see how good the model fit the data""" score = model.score(X_train, y_train).round(3) print("Training score of GradientBoosting is :", score) print( From bf0be83ac3e65b446aadcdc0531a63fbb1214bce Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 07:32:47 +0530 Subject: [PATCH 10/17] Removing files --- .../stacks/reverse_string_using_stack.py | 39 -------- ...ecast-using-gradient-boosting-regressor.py | 89 ------------------- 2 files changed, 128 deletions(-) delete mode 100644 data_structures/stacks/reverse_string_using_stack.py delete mode 100644 machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py diff --git a/data_structures/stacks/reverse_string_using_stack.py b/data_structures/stacks/reverse_string_using_stack.py deleted file mode 100644 index c2614a79dae0..000000000000 --- a/data_structures/stacks/reverse_string_using_stack.py +++ /dev/null @@ -1,39 +0,0 @@ -#Write a program that take input as string stack and return the reverse of that string in python - -class Stack(): - def __init__(self): - self.items = [] -# this function will take the item and push onto the top of the stack - def push(self, item): - return self.items.append(item) -# this function pop will take out the top element from the stack - def pop(self): - return self.items.pop() -# to check if the stack if empty without this we can't reverse the given string. - def is_empty(self): - return self.items == [] -# this function is optional it will just return to top of the stack when called - def peek(self): - if not is_empty(): - return self.items[-1] - -def _reverse_(Stack, input_str_): - # loop through the string then push it into the stack then - # pop the item from the stack append to an empty string variable - - for i in range(len(input_str_)): - - Stack.push(input_str_[i]) -# this empty string is where we are going to store the reversed string - result = '' - while not Stack.is_empty(): - result = result + Stack.pop() - return result - - -stack = Stack() -#input_str_ = ' Abdoulaye' -input_str_= input("Enter the string to reverse: ") -print(_reverse_(stack, input_str_)) -# Enter the string to reverse: Hello world -# dlrow olleH diff --git a/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py b/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py deleted file mode 100644 index 822e79175553..000000000000 --- a/machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py +++ /dev/null @@ -1,89 +0,0 @@ -""" We are going to predict the adj close price of microsoft stock price.""" -#Install the dependencies pip install quandl -import quandl -import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.metrics import r2_score, mean_squared_error -import matplotlib.pyplot as plt -# Get the stock data -df = quandl.get("WIKI/MSFT") -# Take a look at the data -print(df.head()) -import plotly.express as px -fig = px.scatter(df, x="High", y="Low") -fig.show() -# Get the Adjusted Close Price -df = df[['Adj. Close']] -# Take a look at the new data -print(df.head()) -# A variable for predicting 'n' days out into the future -forecast_out = 30 #'n=30' days -#Create another column (the target ) shifted 'n' units up -df['Prediction'] = df[['Adj. Close']].shift(-forecast_out) -#print the new data set -print(df.tail()) -# Convert the dataframe to a numpy array -X = np.array(df.drop(['Prediction'],1)) -#Remove the last '30' rows -X = X[:-forecast_out] -print(X) -### Create the dependent data set (y) ##### -# Convert the dataframe to a numpy array -y = np.array(df['Prediction']) -# Get all of the y values except the last '30' rows -y = y[:-forecast_out] -print(y) -x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) -#these are the parametes that we are given to the gradient boosting regressor -params = { - 'loss':'ls', - 'learning_rate':0.1, - 'n_estimators':500, - 'min_samples_split':2, - 'min_weight_fraction_leaf':0.0, - 'max_depth':3, - -} -model = GradientBoostingRegressor(**params) -model.fit(x_train,y_train) -model.score(x_train,y_train).round(3) -model.score(x_test,y_test).round(3) -y_pred = model.predict(x_test) -print('The mean squared error is: ', mean_squared_error(y_test,y_pred)) -print('The variance is: ', r2_score(y_test,y_pred)) - -# So let's run the model against the test data -from sklearn.model_selection import cross_val_predict - -fig, ax = plt.subplots() -ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) -ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) -ax.set_xlabel('Actual') -ax.set_ylabel('Predicted') -ax.set_title("Ground Truth vs Predicted") -plt.show() -# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. -#It is a generalization of the idea of using the sum of squares -#of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood. -test_score = np.zeros((params['n_estimators'],), dtype=np.float64) -for i, y_pred in enumerate(model.staged_predict(x_test)): - test_score[i] = model.loss_(y_test, y_pred) - -fig = plt.figure(figsize=(10, 6)) -plt.subplot(1, 1, 1) -plt.title('Deviance') -plt.plot(np.arange(params['n_estimators']) + 1, model.train_score_, 'b-', - label='Training Set Deviance') -plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', - label='Test Set Deviance') -plt.legend(loc='upper right') -plt.xlabel('Boosting Iterations') -plt.ylabel('Deviance') -fig.tight_layout() -plt.show() - - - - - From b0c8dcf7da7c3326db4ee5f665d89aebeb885ad2 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 07:47:49 +0530 Subject: [PATCH 11/17] GradientBoostingRegressor example --- machine_learning/Gradient-boosting-regressor.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py index c07c047ce974..2b0909a4a87a 100644 --- a/machine_learning/Gradient-boosting-regressor.py +++ b/machine_learning/Gradient-boosting-regressor.py @@ -1,5 +1,5 @@ -"""Implementation of GradientBoostingRegressor in sklearn using the - boston dataset which is very popular for regression problem to +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to predict house price. """ @@ -38,12 +38,13 @@ def main(): ) # training the model model.fit(X_train, y_train) - """let have a look on the train and test score + """let have a look on the train and test score to see how good the model fit the data""" score = model.score(X_train, y_train).round(3) print("Training score of GradientBoosting is :", score) print( - "the test score of GradienBoosting is :", model.score(X_test, y_test).round(3) + "the test score of GradienBoosting is :", + model.score(X_test, y_test).round(3) ) # Let us evaluation the model by finding the errors y_pred = model.predict(X_test) @@ -56,7 +57,8 @@ def main(): # So let's run the model against the test data fig, ax = plt.subplots() ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) - ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4) + ax.plot([y_test.min(), y_test.max()], + [y_test.min(), y_test.max()], "k--", lw=4) ax.set_xlabel("Actual") ax.set_ylabel("Predicted") ax.set_title("Truth vs Predicted") From 7d0cbf9d4194a842c18f7c9615cb4c23a783f5a2 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 07:56:50 +0530 Subject: [PATCH 12/17] Demo Gradient Boosting --- .../Gradient_boosting_regressor.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 machine_learning/Gradient_boosting_regressor.py diff --git a/machine_learning/Gradient_boosting_regressor.py b/machine_learning/Gradient_boosting_regressor.py new file mode 100644 index 000000000000..2b0909a4a87a --- /dev/null +++ b/machine_learning/Gradient_boosting_regressor.py @@ -0,0 +1,70 @@ +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to + predict house price. +""" + +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.datasets import load_boston +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.model_selection import train_test_split + + +def main(): + + # loading the dataset from the sklearn + df = load_boston() + print(df.keys()) + # now let construct a data frame + df_boston = pd.DataFrame(df.data, columns=df.feature_names) + # let add the target to the dataframe + df_boston["Price"] = df.target + # print the first five rows using the head function + print(df_boston.head()) + # Summary statistics + print(df_boston.describe().T) + # Feature selection + + X = df_boston.iloc[:, :-1] + y = df_boston.iloc[:, -1] # target variable + # split the data with 75% train and 25% test sets. + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0, test_size=0.25 + ) + + model = GradientBoostingRegressor( + n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 + ) + # training the model + model.fit(X_train, y_train) + """let have a look on the train and test score + to see how good the model fit the data""" + score = model.score(X_train, y_train).round(3) + print("Training score of GradientBoosting is :", score) + print( + "the test score of GradienBoosting is :", + model.score(X_test, y_test).round(3) + ) + # Let us evaluation the model by finding the errors + y_pred = model.predict(X_test) + + # The mean squared error + print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) + # Explained variance score: 1 is perfect prediction + print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) + + # So let's run the model against the test data + fig, ax = plt.subplots() + ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) + ax.plot([y_test.min(), y_test.max()], + [y_test.min(), y_test.max()], "k--", lw=4) + ax.set_xlabel("Actual") + ax.set_ylabel("Predicted") + ax.set_title("Truth vs Predicted") + # this show function will display the plotting + plt.show() + + +if __name__ == "__main__": + main() From 30ad0acb040de052474cced33da5be59e3715d21 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 08:03:56 +0530 Subject: [PATCH 13/17] Demo Gradient boosting --- .../Gradient-boosting-regressor.py | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 machine_learning/Gradient-boosting-regressor.py diff --git a/machine_learning/Gradient-boosting-regressor.py b/machine_learning/Gradient-boosting-regressor.py deleted file mode 100644 index 2b0909a4a87a..000000000000 --- a/machine_learning/Gradient-boosting-regressor.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Implementation of GradientBoostingRegressor in sklearn using the - boston dataset which is very popular for regression problem to - predict house price. -""" - -import pandas as pd -import matplotlib.pyplot as plt -from sklearn.datasets import load_boston -from sklearn.metrics import mean_squared_error, r2_score -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.model_selection import train_test_split - - -def main(): - - # loading the dataset from the sklearn - df = load_boston() - print(df.keys()) - # now let construct a data frame - df_boston = pd.DataFrame(df.data, columns=df.feature_names) - # let add the target to the dataframe - df_boston["Price"] = df.target - # print the first five rows using the head function - print(df_boston.head()) - # Summary statistics - print(df_boston.describe().T) - # Feature selection - - X = df_boston.iloc[:, :-1] - y = df_boston.iloc[:, -1] # target variable - # split the data with 75% train and 25% test sets. - X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=0, test_size=0.25 - ) - - model = GradientBoostingRegressor( - n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 - ) - # training the model - model.fit(X_train, y_train) - """let have a look on the train and test score - to see how good the model fit the data""" - score = model.score(X_train, y_train).round(3) - print("Training score of GradientBoosting is :", score) - print( - "the test score of GradienBoosting is :", - model.score(X_test, y_test).round(3) - ) - # Let us evaluation the model by finding the errors - y_pred = model.predict(X_test) - - # The mean squared error - print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) - # Explained variance score: 1 is perfect prediction - print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) - - # So let's run the model against the test data - fig, ax = plt.subplots() - ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) - ax.plot([y_test.min(), y_test.max()], - [y_test.min(), y_test.max()], "k--", lw=4) - ax.set_xlabel("Actual") - ax.set_ylabel("Predicted") - ax.set_title("Truth vs Predicted") - # this show function will display the plotting - plt.show() - - -if __name__ == "__main__": - main() From b1c51ebbc2b63987f16442a2c456e0edcff34188 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 15:14:35 +0530 Subject: [PATCH 14/17] demo of gradient boosting --- .../gradient_boosting_regressor.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 machine_learning/gradient_boosting_regressor.py diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py new file mode 100644 index 000000000000..2b0909a4a87a --- /dev/null +++ b/machine_learning/gradient_boosting_regressor.py @@ -0,0 +1,70 @@ +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to + predict house price. +""" + +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.datasets import load_boston +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.model_selection import train_test_split + + +def main(): + + # loading the dataset from the sklearn + df = load_boston() + print(df.keys()) + # now let construct a data frame + df_boston = pd.DataFrame(df.data, columns=df.feature_names) + # let add the target to the dataframe + df_boston["Price"] = df.target + # print the first five rows using the head function + print(df_boston.head()) + # Summary statistics + print(df_boston.describe().T) + # Feature selection + + X = df_boston.iloc[:, :-1] + y = df_boston.iloc[:, -1] # target variable + # split the data with 75% train and 25% test sets. + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0, test_size=0.25 + ) + + model = GradientBoostingRegressor( + n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 + ) + # training the model + model.fit(X_train, y_train) + """let have a look on the train and test score + to see how good the model fit the data""" + score = model.score(X_train, y_train).round(3) + print("Training score of GradientBoosting is :", score) + print( + "the test score of GradienBoosting is :", + model.score(X_test, y_test).round(3) + ) + # Let us evaluation the model by finding the errors + y_pred = model.predict(X_test) + + # The mean squared error + print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) + # Explained variance score: 1 is perfect prediction + print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) + + # So let's run the model against the test data + fig, ax = plt.subplots() + ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) + ax.plot([y_test.min(), y_test.max()], + [y_test.min(), y_test.max()], "k--", lw=4) + ax.set_xlabel("Actual") + ax.set_ylabel("Predicted") + ax.set_title("Truth vs Predicted") + # this show function will display the plotting + plt.show() + + +if __name__ == "__main__": + main() From fb17080a1045ee85445ed71709a7564d7edadaf8 Mon Sep 17 00:00:00 2001 From: abdoulayegk Date: Thu, 13 Aug 2020 15:25:14 +0530 Subject: [PATCH 15/17] gradient boosting demo --- .../Gradient_boosting_regressor.py | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 machine_learning/Gradient_boosting_regressor.py diff --git a/machine_learning/Gradient_boosting_regressor.py b/machine_learning/Gradient_boosting_regressor.py deleted file mode 100644 index 2b0909a4a87a..000000000000 --- a/machine_learning/Gradient_boosting_regressor.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Implementation of GradientBoostingRegressor in sklearn using the - boston dataset which is very popular for regression problem to - predict house price. -""" - -import pandas as pd -import matplotlib.pyplot as plt -from sklearn.datasets import load_boston -from sklearn.metrics import mean_squared_error, r2_score -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.model_selection import train_test_split - - -def main(): - - # loading the dataset from the sklearn - df = load_boston() - print(df.keys()) - # now let construct a data frame - df_boston = pd.DataFrame(df.data, columns=df.feature_names) - # let add the target to the dataframe - df_boston["Price"] = df.target - # print the first five rows using the head function - print(df_boston.head()) - # Summary statistics - print(df_boston.describe().T) - # Feature selection - - X = df_boston.iloc[:, :-1] - y = df_boston.iloc[:, -1] # target variable - # split the data with 75% train and 25% test sets. - X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=0, test_size=0.25 - ) - - model = GradientBoostingRegressor( - n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 - ) - # training the model - model.fit(X_train, y_train) - """let have a look on the train and test score - to see how good the model fit the data""" - score = model.score(X_train, y_train).round(3) - print("Training score of GradientBoosting is :", score) - print( - "the test score of GradienBoosting is :", - model.score(X_test, y_test).round(3) - ) - # Let us evaluation the model by finding the errors - y_pred = model.predict(X_test) - - # The mean squared error - print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) - # Explained variance score: 1 is perfect prediction - print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) - - # So let's run the model against the test data - fig, ax = plt.subplots() - ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) - ax.plot([y_test.min(), y_test.max()], - [y_test.min(), y_test.max()], "k--", lw=4) - ax.set_xlabel("Actual") - ax.set_ylabel("Predicted") - ax.set_title("Truth vs Predicted") - # this show function will display the plotting - plt.show() - - -if __name__ == "__main__": - main() From cdfe1a45d1f79d0daaff1eac55c267458a4c347d Mon Sep 17 00:00:00 2001 From: John Law Date: Sat, 26 Sep 2020 22:14:23 +0800 Subject: [PATCH 16/17] Fix spelling mistake --- machine_learning/gradient_boosting_regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py index 2b0909a4a87a..a66b1a57984b 100644 --- a/machine_learning/gradient_boosting_regressor.py +++ b/machine_learning/gradient_boosting_regressor.py @@ -43,7 +43,7 @@ def main(): score = model.score(X_train, y_train).round(3) print("Training score of GradientBoosting is :", score) print( - "the test score of GradienBoosting is :", + "the test score of GradientBoosting is :", model.score(X_test, y_test).round(3) ) # Let us evaluation the model by finding the errors From 28f87db0be3213b84ee614c36ad1743f9ec26717 Mon Sep 17 00:00:00 2001 From: John Law Date: Sat, 26 Sep 2020 22:28:47 +0800 Subject: [PATCH 17/17] Fix formatting --- machine_learning/gradient_boosting_regressor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py index a66b1a57984b..045aa056ec2f 100644 --- a/machine_learning/gradient_boosting_regressor.py +++ b/machine_learning/gradient_boosting_regressor.py @@ -38,13 +38,13 @@ def main(): ) # training the model model.fit(X_train, y_train) - """let have a look on the train and test score - to see how good the model fit the data""" - score = model.score(X_train, y_train).round(3) - print("Training score of GradientBoosting is :", score) + # to see how good the model fit the data + training_score = model.score(X_train, y_train).round(3) + test_score = model.score(X_test, y_test).round(3) + print("Training score of GradientBoosting is :", training_score) print( - "the test score of GradientBoosting is :", - model.score(X_test, y_test).round(3) + "The test score of GradientBoosting is :", + test_score ) # Let us evaluation the model by finding the errors y_pred = model.predict(X_test)