From 7e69e5f626101d4d571510e1522ffc70f0768708 Mon Sep 17 00:00:00 2001
From: Epik-Whale463 <pvrcharan2022@gmail.com>
Date: Thu, 26 Sep 2024 12:58:40 +0530
Subject: [PATCH 1/5] Add tests to pi_generator #9947

---
 maths/pi_generator.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/maths/pi_generator.py b/maths/pi_generator.py
index 97f2c540c1ce..a754457572fc 100644
--- a/maths/pi_generator.py
+++ b/maths/pi_generator.py
@@ -34,6 +34,13 @@ def calculate_pi(limit: int) -> str:
     '3.14159265358979323846264338327950288419716939937510'
     >>> calculate_pi(80)
     '3.14159265358979323846264338327950288419716939937510582097494459230781640628620899'
+    >>> calculate_pi(100)
+    '3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679'
+    >>> calculate_pi(150)
+    '3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233785'
+    >>> calculate_pi(200)
+    '3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233785661758005171150968760911718138199318110487701902711116020288271298794483246506877457713886116427171479924442928230863465674818467669405132000568127145263560827785771342757789609173637178721468440901224953431338055097754720181942947403241217'
+
     """
     # Variables used for the iteration process
     q = 1

From 0e1b238efb84c049d229a05b509efab630531372 Mon Sep 17 00:00:00 2001
From: Epik-Whale463 <pvrcharan2022@gmail.com>
Date: Thu, 26 Sep 2024 13:13:13 +0530
Subject: [PATCH 2/5] Changed tests in pi_generator

---
 maths/pi_generator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/maths/pi_generator.py b/maths/pi_generator.py
index a754457572fc..6744a99fff2b 100644
--- a/maths/pi_generator.py
+++ b/maths/pi_generator.py
@@ -38,9 +38,7 @@ def calculate_pi(limit: int) -> str:
     '3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679'
     >>> calculate_pi(150)
     '3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233785'
-    >>> calculate_pi(200)
-    '3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233785661758005171150968760911718138199318110487701902711116020288271298794483246506877457713886116427171479924442928230863465674818467669405132000568127145263560827785771342757789609173637178721468440901224953431338055097754720181942947403241217'
-
+   
     """
     # Variables used for the iteration process
     q = 1

From 25a13fee51b101ce377778a1fe89b2aa6aa3c275 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Sep 2024 07:44:18 +0000
Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 maths/pi_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/maths/pi_generator.py b/maths/pi_generator.py
index 6744a99fff2b..b633d97d3983 100644
--- a/maths/pi_generator.py
+++ b/maths/pi_generator.py
@@ -38,7 +38,7 @@ def calculate_pi(limit: int) -> str:
     '3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679'
     >>> calculate_pi(150)
     '3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233785'
-   
+
     """
     # Variables used for the iteration process
     q = 1

From ece859d400758ca16c21a2979426648672c90f5e Mon Sep 17 00:00:00 2001
From: Epik-Whale463 <pvrcharan2022@gmail.com>
Date: Wed, 2 Oct 2024 23:13:22 +0530
Subject: [PATCH 4/5] Refactor linear_regression.py: Add OLS, fix gradient
 descent, compare methods Improve accuracy, add sklearn benchmark, and
 visualize results

---
 machine_learning/linear_regression.py | 156 +++++++++++---------------
 1 file changed, 68 insertions(+), 88 deletions(-)

diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py
index 839a5366d1cc..cfcd17b48f72 100644
--- a/machine_learning/linear_regression.py
+++ b/machine_learning/linear_regression.py
@@ -1,21 +1,12 @@
-"""
-Linear regression is the most basic type of regression commonly used for
-predictive analysis. The idea is pretty simple: we have a dataset and we have
-features associated with it. Features should be chosen very cautiously
-as they determine how much our model will be able to make future predictions.
-We try to set the weight of these features, over many iterations, so that they best
-fit our dataset. In this particular code, I had used a CSGO dataset (ADR vs
-Rating). We try to best fit a line through dataset and estimate the parameters.
-"""
-
 import numpy as np
 import requests
-
+from sklearn.linear_model import LinearRegression
+import matplotlib.pyplot as plt
 
 def collect_dataset():
     """Collect dataset of CSGO
     The dataset contains ADR vs Rating of a Player
-    :return : dataset obtained from the link, as matrix
+    :return : dataset obtained from the link, as numpy array
     """
     response = requests.get(
         "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
@@ -24,94 +15,83 @@ def collect_dataset():
     )
     lines = response.text.splitlines()
     data = []
-    for item in lines:
+    for item in lines[1:]:  # Skip the header
         item = item.split(",")
         data.append(item)
-    data.pop(0)  # This is for removing the labels from the list
-    dataset = np.matrix(data)
+    dataset = np.array(data, dtype=float)
     return dataset
 
-
-def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
-    """Run steep gradient descent and updates the Feature vector accordingly_
-    :param data_x   : contains the dataset
-    :param data_y   : contains the output associated with each data-entry
-    :param len_data : length of the data_
-    :param alpha    : Learning rate of the model
-    :param theta    : Feature vector (weight's for our model)
-    ;param return    : Updated Feature's, using
-                       curr_features - alpha_ * gradient(w.r.t. feature)
-    """
-    n = len_data
-
-    prod = np.dot(theta, data_x.transpose())
-    prod -= data_y.transpose()
-    sum_grad = np.dot(prod, data_x)
-    theta = theta - (alpha / n) * sum_grad
-    return theta
-
-
-def sum_of_square_error(data_x, data_y, len_data, theta):
-    """Return sum of square error for error calculation
-    :param data_x    : contains our dataset
-    :param data_y    : contains the output (result vector)
-    :param len_data  : len of the dataset
-    :param theta     : contains the feature vector
-    :return          : sum of square error computed from given feature's
-    """
-    prod = np.dot(theta, data_x.transpose())
-    prod -= data_y.transpose()
-    sum_elem = np.sum(np.square(prod))
-    error = sum_elem / (2 * len_data)
-    return error
-
-
-def run_linear_regression(data_x, data_y):
-    """Implement Linear regression over the dataset
-    :param data_x  : contains our dataset
-    :param data_y  : contains the output (result vector)
-    :return        : feature for line of best fit (Feature vector)
+def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
+    """Run gradient descent to find approximate coefficients
+    :param X: feature matrix
+    :param y: target vector
+    :param learning_rate: learning rate for gradient descent
+    :param iterations: number of iterations
+    :return: coefficients (intercept and slope)
     """
-    iterations = 100000
-    alpha = 0.0001550
-
-    no_features = data_x.shape[1]
-    len_data = data_x.shape[0] - 1
-
-    theta = np.zeros((1, no_features))
-
+    m = X.shape[0]
+    theta = np.zeros(X.shape[1])
+    
     for i in range(iterations):
-        theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
-        error = sum_of_square_error(data_x, data_y, len_data, theta)
-        print(f"At Iteration {i + 1} - Error is {error:.5f}")
-
+        h = np.dot(X, theta)
+        gradient = np.dot(X.T, (h - y)) / m
+        theta -= learning_rate * gradient
+        
+        if i % 10000 == 0:
+            mse = np.mean((h - y) ** 2)
+            print(f"Iteration {i}: MSE = {mse:.5f}")
+    
     return theta
 
-
-def mean_absolute_error(predicted_y, original_y):
-    """Return sum of square error for error calculation
-    :param predicted_y   : contains the output of prediction (result vector)
-    :param original_y    : contains values of expected outcome
-    :return          : mean absolute error computed from given feature's
+def calculate_ols_coefficients(X, y):
+    """Calculate optimal coefficients using the normal equation
+    :param X: feature matrix
+    :param y: target vector
+    :return: coefficients (intercept and slope)
     """
-    total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
-    return total / len(original_y)
-
+    return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
 
 def main():
     """Driver function"""
     data = collect_dataset()
-
-    len_data = data.shape[0]
-    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
-    data_y = data[:, -1].astype(float)
-
-    theta = run_linear_regression(data_x, data_y)
-    len_result = theta.shape[1]
-    print("Resultant Feature vector : ")
-    for i in range(len_result):
-        print(f"{theta[0, i]:.5f}")
-
+    
+    X = data[:, 0].reshape(-1, 1)
+    y = data[:, 1]
+    
+    # Add intercept term to X
+    X_with_intercept = np.c_[np.ones(X.shape[0]), X]
+    
+    # Gradient Descent
+    gd_theta = run_gradient_descent(X_with_intercept, y)
+    print(f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}")
+    
+    # Ordinary Least Squares (Normal Equation)
+    ols_theta = calculate_ols_coefficients(X_with_intercept, y)
+    print(f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}")
+    
+    # Sklearn for comparison
+    reg = LinearRegression().fit(X, y)
+    print(f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}")
+    
+    # Calculate and print MSE for each method
+    gd_mse = np.mean((np.dot(X_with_intercept, gd_theta) - y) ** 2)
+    ols_mse = np.mean((np.dot(X_with_intercept, ols_theta) - y) ** 2)
+    sklearn_mse = np.mean((reg.predict(X) - y) ** 2)
+    
+    print(f"Gradient Descent MSE: {gd_mse:.5f}")
+    print(f"OLS MSE: {ols_mse:.5f}")
+    print(f"Sklearn MSE: {sklearn_mse:.5f}")
+    
+    # Plotting
+    plt.scatter(X, y, color="lightgray", label="Data points")
+    plt.plot(X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent")
+    plt.plot(X, np.dot(X_with_intercept, ols_theta), color="green", label="OLS (Normal Equation)")
+    plt.plot(X, reg.predict(X), color="blue", label="Sklearn")
+    plt.legend()
+    plt.xlabel("ADR")
+    plt.ylabel("Rating")
+    plt.title("Linear Regression: ADR vs Rating")
+    plt.show()
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 3a07610ae6e4cdb2bcdb7d1cf08b8ae5d22933cd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 2 Oct 2024 17:46:58 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/linear_regression.py | 52 ++++++++++++++++++---------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py
index cfcd17b48f72..b71dcc07ecb9 100644
--- a/machine_learning/linear_regression.py
+++ b/machine_learning/linear_regression.py
@@ -3,6 +3,7 @@
 from sklearn.linear_model import LinearRegression
 import matplotlib.pyplot as plt
 
+
 def collect_dataset():
     """Collect dataset of CSGO
     The dataset contains ADR vs Rating of a Player
@@ -21,6 +22,7 @@ def collect_dataset():
     dataset = np.array(data, dtype=float)
     return dataset
 
+
 def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
     """Run gradient descent to find approximate coefficients
     :param X: feature matrix
@@ -31,18 +33,19 @@ def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
     """
     m = X.shape[0]
     theta = np.zeros(X.shape[1])
-    
+
     for i in range(iterations):
         h = np.dot(X, theta)
         gradient = np.dot(X.T, (h - y)) / m
         theta -= learning_rate * gradient
-        
+
         if i % 10000 == 0:
             mse = np.mean((h - y) ** 2)
             print(f"Iteration {i}: MSE = {mse:.5f}")
-    
+
     return theta
 
+
 def calculate_ols_coefficients(X, y):
     """Calculate optimal coefficients using the normal equation
     :param X: feature matrix
@@ -51,41 +54,55 @@ def calculate_ols_coefficients(X, y):
     """
     return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
 
+
 def main():
     """Driver function"""
     data = collect_dataset()
-    
+
     X = data[:, 0].reshape(-1, 1)
     y = data[:, 1]
-    
+
     # Add intercept term to X
     X_with_intercept = np.c_[np.ones(X.shape[0]), X]
-    
+
     # Gradient Descent
     gd_theta = run_gradient_descent(X_with_intercept, y)
-    print(f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}")
-    
+    print(
+        f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}"
+    )
+
     # Ordinary Least Squares (Normal Equation)
     ols_theta = calculate_ols_coefficients(X_with_intercept, y)
-    print(f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}")
-    
+    print(
+        f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}"
+    )
+
     # Sklearn for comparison
     reg = LinearRegression().fit(X, y)
-    print(f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}")
-    
+    print(
+        f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}"
+    )
+
     # Calculate and print MSE for each method
     gd_mse = np.mean((np.dot(X_with_intercept, gd_theta) - y) ** 2)
     ols_mse = np.mean((np.dot(X_with_intercept, ols_theta) - y) ** 2)
     sklearn_mse = np.mean((reg.predict(X) - y) ** 2)
-    
+
     print(f"Gradient Descent MSE: {gd_mse:.5f}")
     print(f"OLS MSE: {ols_mse:.5f}")
     print(f"Sklearn MSE: {sklearn_mse:.5f}")
-    
+
     # Plotting
     plt.scatter(X, y, color="lightgray", label="Data points")
-    plt.plot(X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent")
-    plt.plot(X, np.dot(X_with_intercept, ols_theta), color="green", label="OLS (Normal Equation)")
+    plt.plot(
+        X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent"
+    )
+    plt.plot(
+        X,
+        np.dot(X_with_intercept, ols_theta),
+        color="green",
+        label="OLS (Normal Equation)",
+    )
     plt.plot(X, reg.predict(X), color="blue", label="Sklearn")
     plt.legend()
     plt.xlabel("ADR")
@@ -93,5 +110,6 @@ def main():
     plt.title("Linear Regression: ADR vs Rating")
     plt.show()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()