TheAlgorithms · cclauss · Sep 26, 2020 · Aug 11, 2020 · Aug 11, 2020 · Aug 11, 2020
diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py
@@ -0,0 +1,70 @@
+"""Implementation of GradientBoostingRegressor in sklearn using the
+   boston dataset which is very popular for regression problem to
+   predict house price.
+"""
+
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.datasets import load_boston
+from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.model_selection import train_test_split
+
+
+def main():
+
+    # loading the dataset from the sklearn
+    df = load_boston()
+    print(df.keys())
+    # now let construct a data frame
+    df_boston = pd.DataFrame(df.data, columns=df.feature_names)
+    # let add the target to the dataframe
+    df_boston["Price"] = df.target
+    # print the first five rows using the head function
+    print(df_boston.head())
+    # Summary statistics
+    print(df_boston.describe().T)
+    # Feature selection
+
+    X = df_boston.iloc[:, :-1]
+    y = df_boston.iloc[:, -1]  # target variable
+    # split the data with 75% train and 25% test sets.
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=0, test_size=0.25
+    )
+
+    model = GradientBoostingRegressor(
+        n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
+    )
+    # training the model
+    model.fit(X_train, y_train)
+    # to see how good the model fit the data
+    training_score = model.score(X_train, y_train).round(3)
+    test_score = model.score(X_test, y_test).round(3)
+    print("Training score of GradientBoosting is :", training_score)
+    print(
+        "The test score of GradientBoosting is :",
+        test_score
+    )
+    # Let us evaluation the model by finding the errors
+    y_pred = model.predict(X_test)
+
+    # The mean squared error
+    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
+    # Explained variance score: 1 is perfect prediction
+    print("Test Variance score: %.2f" % r2_score(y_test, y_pred))
+
+    # So let's run the model against the test data
+    fig, ax = plt.subplots()
+    ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
+    ax.plot([y_test.min(), y_test.max()],
+            [y_test.min(), y_test.max()], "k--", lw=4)
+    ax.set_xlabel("Actual")
+    ax.set_ylabel("Predicted")
+    ax.set_title("Truth vs Predicted")
+    # this show function will display the plotting
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()