diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py
index 839a5366d1cc..eb34dffdf1b6 100644
--- a/machine_learning/linear_regression.py
+++ b/machine_learning/linear_regression.py
@@ -1,13 +1,3 @@
-"""
-Linear regression is the most basic type of regression commonly used for
-predictive analysis. The idea is pretty simple: we have a dataset and we have
-features associated with it. Features should be chosen very cautiously
-as they determine how much our model will be able to make future predictions.
-We try to set the weight of these features, over many iterations, so that they best
-fit our dataset. In this particular code, I had used a CSGO dataset (ADR vs
-Rating). We try to best fit a line through dataset and estimate the parameters.
-"""
-
 import numpy as np
 import requests
 
@@ -15,32 +5,32 @@
 def collect_dataset():
     """Collect dataset of CSGO
     The dataset contains ADR vs Rating of a Player
-    :return : dataset obtained from the link, as matrix
+    :return: dataset obtained from the link, as a matrix
     """
     response = requests.get(
-        "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
-        "master/Week1/ADRvsRating.csv",
+        "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv",
         timeout=10,
     )
-    lines = response.text.splitlines()
-    data = []
-    for item in lines:
-        item = item.split(",")
-        data.append(item)
-    data.pop(0)  # This is for removing the labels from the list
-    dataset = np.matrix(data)
-    return dataset
+    data = np.loadtxt(response.text.splitlines()[1:], delimiter=",")  # Skip the header
+    return data
+
+
+def normalize_features(data):
+    """Normalize feature values to have mean 0 and variance 1"""
+    means = np.mean(data[:, :-1], axis=0)
+    stds = np.std(data[:, :-1], axis=0)
+    data[:, :-1] = (data[:, :-1] - means) / stds
+    return data
 
 
 def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
-    """Run steep gradient descent and updates the Feature vector accordingly_
-    :param data_x   : contains the dataset
-    :param data_y   : contains the output associated with each data-entry
-    :param len_data : length of the data_
-    :param alpha    : Learning rate of the model
-    :param theta    : Feature vector (weight's for our model)
-    ;param return    : Updated Feature's, using
-                       curr_features - alpha_ * gradient(w.r.t. feature)
+    """Run steep gradient descent and updates the Feature vector accordingly
+    :param data_x: contains the dataset
+    :param data_y: contains the output associated with each data-entry
+    :param len_data: length of the data
+    :param alpha: Learning rate of the model
+    :param theta: Feature vector (weights for our model)
+    :return: Updated Features, using curr_features - alpha * gradient(w.r.t. feature)
     """
     n = len_data
 
@@ -53,11 +43,11 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
 
 def sum_of_square_error(data_x, data_y, len_data, theta):
     """Return sum of square error for error calculation
-    :param data_x    : contains our dataset
-    :param data_y    : contains the output (result vector)
-    :param len_data  : len of the dataset
-    :param theta     : contains the feature vector
-    :return          : sum of square error computed from given feature's
+    :param data_x: contains our dataset
+    :param data_y: contains the output (result vector)
+    :param len_data: length of the dataset
+    :param theta: contains the feature vector
+    :return: sum of square error computed from given features
     """
     prod = np.dot(theta, data_x.transpose())
     prod -= data_y.transpose()
@@ -68,31 +58,36 @@ def sum_of_square_error(data_x, data_y, len_data, theta):
 
 def run_linear_regression(data_x, data_y):
     """Implement Linear regression over the dataset
-    :param data_x  : contains our dataset
-    :param data_y  : contains the output (result vector)
-    :return        : feature for line of best fit (Feature vector)
+    :param data_x: contains our dataset
+    :param data_y: contains the output (result vector)
+    :return: feature for the line of best fit (Feature vector)
     """
     iterations = 100000
     alpha = 0.0001550
 
     no_features = data_x.shape[1]
-    len_data = data_x.shape[0] - 1
+    len_data = data_x.shape[0]
 
     theta = np.zeros((1, no_features))
+    rng = np.random.default_rng()  # Create a random generator instance
 
     for i in range(iterations):
-        theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
-        error = sum_of_square_error(data_x, data_y, len_data, theta)
+        indices = rng.choice(len_data, size=32, replace=False)  # Randomly sample indices using the generator
+        x_batch = data_x[indices]
+        y_batch = data_y[indices]
+
+        theta = run_steep_gradient_descent(x_batch, y_batch, len(x_batch), alpha, theta)
+        error = sum_of_square_error(x_batch, y_batch, len(x_batch), theta)
         print(f"At Iteration {i + 1} - Error is {error:.5f}")
 
     return theta
 
 
 def mean_absolute_error(predicted_y, original_y):
-    """Return sum of square error for error calculation
-    :param predicted_y   : contains the output of prediction (result vector)
-    :param original_y    : contains values of expected outcome
-    :return          : mean absolute error computed from given feature's
+    """Return mean absolute error for error calculation
+    :param predicted_y: contains the output of prediction (result vector)
+    :param original_y: contains values of expected outcome
+    :return: mean absolute error computed from given features
     """
     total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
     return total / len(original_y)
@@ -101,9 +96,10 @@ def mean_absolute_error(predicted_y, original_y):
 def main():
     """Driver function"""
     data = collect_dataset()
+    data = normalize_features(data)  # Normalize the features
 
     len_data = data.shape[0]
-    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
+    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)  # Add bias term
     data_y = data[:, -1].astype(float)
 
     theta = run_linear_regression(data_x, data_y)