Skip to content

Gradient Boosting Regressor #2298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Sep 26, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions machine_learning/gradient_boosting_regressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Implementation of GradientBoostingRegressor in sklearn using the
boston dataset which is very popular for regression problem to
predict house price.
"""

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split


def main():

# loading the dataset from the sklearn
df = load_boston()
print(df.keys())
# now let construct a data frame
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
# let add the target to the dataframe
df_boston["Price"] = df.target
# print the first five rows using the head function
print(df_boston.head())
# Summary statistics
print(df_boston.describe().T)
# Feature selection

X = df_boston.iloc[:, :-1]
y = df_boston.iloc[:, -1] # target variable
# split the data with 75% train and 25% test sets.
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=0, test_size=0.25
)

model = GradientBoostingRegressor(
n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
)
# training the model
model.fit(X_train, y_train)
# to see how good the model fit the data
training_score = model.score(X_train, y_train).round(3)
test_score = model.score(X_test, y_test).round(3)
print("Training score of GradientBoosting is :", training_score)
print(
"The test score of GradientBoosting is :",
test_score
)
# Let us evaluation the model by finding the errors
y_pred = model.predict(X_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print("Test Variance score: %.2f" % r2_score(y_test, y_pred))

# So let's run the model against the test data
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()],
[y_test.min(), y_test.max()], "k--", lw=4)
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title("Truth vs Predicted")
# this show function will display the plotting
plt.show()


if __name__ == "__main__":
main()