From f3fb50405973d92d0d376d3573308d512705ab32 Mon Sep 17 00:00:00 2001
From: Ishan Mishra <33893659+legitishan@users.noreply.github.com>
Date: Wed, 9 Oct 2024 00:38:33 +0530
Subject: [PATCH 1/4] Create simCLR.py

---
 machine_learning/simCLR.py | 114 +++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 machine_learning/simCLR.py

diff --git a/machine_learning/simCLR.py b/machine_learning/simCLR.py
new file mode 100644
index 000000000000..4f8833af6a14
--- /dev/null
+++ b/machine_learning/simCLR.py
@@ -0,0 +1,114 @@
+"""
+Implementation of Self-Supervised Learning (SSL) with SimCLR. SimCLR is a framework for learning visual representations without labels by maximizing the agreement between different augmented views of the same image.
+"""
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import layers
+from tensorflow.keras.models import Model
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.applications import ResNet50
+from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import matplotlib.pyplot as plt
+
+
+def data_handling(data: dict) -> tuple:
+    """
+    Handles the data by splitting features and targets.
+    
+    >>> data_handling({'data': np.array([[0.1, 0.2], [0.3, 0.4]]), 'target': np.array([0, 1])})
+    (array([[0.1, 0.2], [0.3, 0.4]]), array([0, 1]))
+    """
+    return (data["data"], data["target"])
+
+
+def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
+    """
+    Builds a SimCLR model based on ResNet50.
+    
+    >>> simclr_model().summary()  # doctest: +ELLIPSIS
+    Model: "model"
+    _________________________________________________________________
+    ...
+    """
+    base_model = ResNet50(include_top=False, input_shape=input_shape, pooling="avg")
+    base_model.trainable = True
+    
+    inputs = layers.Input(shape=input_shape)
+    x = base_model(inputs, training=True)
+    x = layers.Dense(projection_dim, activation="relu")(x)
+    outputs = layers.Dense(projection_dim)(x)
+    return Model(inputs, outputs)
+
+
+def contrastive_loss(projection_1, projection_2, temperature=0.1):
+    """
+    Contrastive loss function for self-supervised learning.
+    
+    >>> contrastive_loss(np.array([0.1]), np.array([0.2]))
+    0.0
+    """
+    projections = tf.concat([projection_1, projection_2], axis=0)
+    similarity_matrix = tf.matmul(projections, projections, transpose_b=True)
+    labels = tf.range(tf.shape(projections)[0])
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, similarity_matrix)
+    return tf.reduce_mean(loss)
+
+
+def main() -> None:
+    """
+    >>> main()
+    """
+    # Load a small dataset (using CIFAR-10 as an example)
+    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
+    x_train, x_test = x_train / 255.0, x_test / 255.0
+
+    # Use label encoder to convert labels into numerical form
+    le = LabelEncoder()
+    y_train = le.fit_transform(y_train.flatten())
+    y_test = le.transform(y_test.flatten())
+    
+    # Split data into train and validation sets
+    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)
+
+    # Build the SimCLR model
+    model = simclr_model()
+    optimizer = Adam()
+    loss_fn = SparseCategoricalCrossentropy(from_logits=True)
+
+    # Training the SimCLR model
+    for epoch in range(10):
+        with tf.GradientTape() as tape:
+            projections_1 = model(x_train)
+            projections_2 = model(x_train)  # Normally, this would use augmented views
+            loss = contrastive_loss(projections_1, projections_2)
+        gradients = tape.gradient(loss, model.trainable_variables)
+        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+        print(f"Epoch {epoch+1}: Contrastive Loss = {loss.numpy()}")
+
+    # Create a new model with a classification head for evaluation
+    classifier = layers.Dense(10, activation="softmax")(model.output)
+    classifier_model = Model(model.input, classifier)
+    classifier_model.compile(optimizer=Adam(), loss=loss_fn, metrics=["accuracy"])
+    classifier_model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=5)
+
+    # Display the confusion matrix of the classifier
+    ConfusionMatrixDisplay.from_estimator(
+        classifier_model,
+        x_test,
+        y_test,
+        display_labels=le.classes_,
+        cmap="Blues",
+        normalize="true",
+    )
+    plt.title("Normalized Confusion Matrix - CIFAR-10")
+    plt.show()
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(verbose=True)
+    main()

From f95efce17d218777074b5aec3462e1cd5294a337 Mon Sep 17 00:00:00 2001
From: Ishan Mishra <33893659+legitishan@users.noreply.github.com>
Date: Wed, 9 Oct 2024 00:40:59 +0530
Subject: [PATCH 2/4] Create simclr.py

---
 machine_learning/simclr.py | 114 +++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 machine_learning/simclr.py

diff --git a/machine_learning/simclr.py b/machine_learning/simclr.py
new file mode 100644
index 000000000000..5cd3505cc1b8
--- /dev/null
+++ b/machine_learning/simclr.py
@@ -0,0 +1,114 @@
+"""
+Implementation of SimCLR. 
+Self-Supervised Learning (SSL) with SimCLR. SimCLR is a framework for learning visual representations without labels by maximizing the agreement between different augmented views of the same image.
+"""
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import layers
+from tensorflow.keras.models import Model
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.applications import ResNet50
+from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import matplotlib.pyplot as plt
+
+
+def data_handling(data: dict) -> tuple:
+    """
+    Handles the data by splitting features and targets.
+    
+    >>> data_handling({'data': np.array([[0.1, 0.2], [0.3, 0.4]]), 'target': np.array([0, 1])})
+    (array([[0.1, 0.2], [0.3, 0.4]]), array([0, 1]))
+    """
+    return (data["data"], data["target"])
+
+
+def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
+    """
+    Builds a SimCLR model based on ResNet50.
+    
+    >>> simclr_model().summary()  # doctest: +ELLIPSIS
+    Model: "model"
+    _________________________________________________________________
+    ...
+    """
+    base_model = ResNet50(include_top=False, input_shape=input_shape, pooling="avg")
+    base_model.trainable = True
+    
+    inputs = layers.Input(shape=input_shape)
+    x = base_model(inputs, training=True)
+    x = layers.Dense(projection_dim, activation="relu")(x)
+    outputs = layers.Dense(projection_dim)(x)
+    return Model(inputs, outputs)
+
+
+def contrastive_loss(projection_1, projection_2, temperature=0.1):
+    """
+    Contrastive loss function for self-supervised learning.
+    
+    >>> contrastive_loss(np.array([0.1]), np.array([0.2]))
+    0.0
+    """
+    projections = tf.concat([projection_1, projection_2], axis=0)
+    similarity_matrix = tf.matmul(projections, projections, transpose_b=True)
+    labels = tf.range(tf.shape(projections)[0])
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, similarity_matrix)
+    return tf.reduce_mean(loss)
+
+
+def main() -> None:
+    """
+    >>> main()
+    """
+    # Load a small dataset (using CIFAR-10 as an example)
+    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
+    x_train, x_test = x_train / 255.0, x_test / 255.0
+
+    # Use label encoder to convert labels into numerical form
+    le = LabelEncoder()
+    y_train = le.fit_transform(y_train.flatten())
+    y_test = le.transform(y_test.flatten())
+    
+    # Split data into train and validation sets
+    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)
+
+    # Build the SimCLR model
+    model = simclr_model()
+    optimizer = Adam()
+    loss_fn = SparseCategoricalCrossentropy(from_logits=True)
+
+    # Training the SimCLR model
+    for epoch in range(10):
+        with tf.GradientTape() as tape:
+            projections_1 = model(x_train)
+            projections_2 = model(x_train)  # Normally, this would use augmented views
+            loss = contrastive_loss(projections_1, projections_2)
+        gradients = tape.gradient(loss, model.trainable_variables)
+        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+        print(f"Epoch {epoch+1}: Contrastive Loss = {loss.numpy()}")
+
+    # Create a new model with a classification head for evaluation
+    classifier = layers.Dense(10, activation="softmax")(model.output)
+    classifier_model = Model(model.input, classifier)
+    classifier_model.compile(optimizer=Adam(), loss=loss_fn, metrics=["accuracy"])
+    classifier_model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=5)
+
+    # Display the confusion matrix of the classifier
+    ConfusionMatrixDisplay.from_estimator(
+        classifier_model,
+        x_test,
+        y_test,
+        display_labels=le.classes_,
+        cmap="Blues",
+        normalize="true",
+    )
+    plt.title("Normalized Confusion Matrix - CIFAR-10")
+    plt.show()
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(verbose=True)
+    main()

From 0e1da4b5184983850b822260ab4a5b4f612fbb7f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Oct 2024 19:15:20 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/simCLR.py | 11 ++++++-----
 machine_learning/simclr.py | 14 ++++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/machine_learning/simCLR.py b/machine_learning/simCLR.py
index 4f8833af6a14..2f66d94e9321 100644
--- a/machine_learning/simCLR.py
+++ b/machine_learning/simCLR.py
@@ -18,7 +18,7 @@
 def data_handling(data: dict) -> tuple:
     """
     Handles the data by splitting features and targets.
-    
+
     >>> data_handling({'data': np.array([[0.1, 0.2], [0.3, 0.4]]), 'target': np.array([0, 1])})
     (array([[0.1, 0.2], [0.3, 0.4]]), array([0, 1]))
     """
@@ -28,7 +28,7 @@ def data_handling(data: dict) -> tuple:
 def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
     """
     Builds a SimCLR model based on ResNet50.
-    
+
     >>> simclr_model().summary()  # doctest: +ELLIPSIS
     Model: "model"
     _________________________________________________________________
@@ -36,7 +36,7 @@ def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
     """
     base_model = ResNet50(include_top=False, input_shape=input_shape, pooling="avg")
     base_model.trainable = True
-    
+
     inputs = layers.Input(shape=input_shape)
     x = base_model(inputs, training=True)
     x = layers.Dense(projection_dim, activation="relu")(x)
@@ -47,7 +47,7 @@ def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
 def contrastive_loss(projection_1, projection_2, temperature=0.1):
     """
     Contrastive loss function for self-supervised learning.
-    
+
     >>> contrastive_loss(np.array([0.1]), np.array([0.2]))
     0.0
     """
@@ -70,7 +70,7 @@ def main() -> None:
     le = LabelEncoder()
     y_train = le.fit_transform(y_train.flatten())
     y_test = le.transform(y_test.flatten())
-    
+
     # Split data into train and validation sets
     x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)
 
@@ -110,5 +110,6 @@ def main() -> None:
 
 if __name__ == "__main__":
     import doctest
+
     doctest.testmod(verbose=True)
     main()
diff --git a/machine_learning/simclr.py b/machine_learning/simclr.py
index 5cd3505cc1b8..f5e0f7f46a01 100644
--- a/machine_learning/simclr.py
+++ b/machine_learning/simclr.py
@@ -1,7 +1,8 @@
 """
-Implementation of SimCLR. 
+Implementation of SimCLR.
 Self-Supervised Learning (SSL) with SimCLR. SimCLR is a framework for learning visual representations without labels by maximizing the agreement between different augmented views of the same image.
 """
+
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import layers
@@ -18,7 +19,7 @@
 def data_handling(data: dict) -> tuple:
     """
     Handles the data by splitting features and targets.
-    
+
     >>> data_handling({'data': np.array([[0.1, 0.2], [0.3, 0.4]]), 'target': np.array([0, 1])})
     (array([[0.1, 0.2], [0.3, 0.4]]), array([0, 1]))
     """
@@ -28,7 +29,7 @@ def data_handling(data: dict) -> tuple:
 def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
     """
     Builds a SimCLR model based on ResNet50.
-    
+
     >>> simclr_model().summary()  # doctest: +ELLIPSIS
     Model: "model"
     _________________________________________________________________
@@ -36,7 +37,7 @@ def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
     """
     base_model = ResNet50(include_top=False, input_shape=input_shape, pooling="avg")
     base_model.trainable = True
-    
+
     inputs = layers.Input(shape=input_shape)
     x = base_model(inputs, training=True)
     x = layers.Dense(projection_dim, activation="relu")(x)
@@ -47,7 +48,7 @@ def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
 def contrastive_loss(projection_1, projection_2, temperature=0.1):
     """
     Contrastive loss function for self-supervised learning.
-    
+
     >>> contrastive_loss(np.array([0.1]), np.array([0.2]))
     0.0
     """
@@ -70,7 +71,7 @@ def main() -> None:
     le = LabelEncoder()
     y_train = le.fit_transform(y_train.flatten())
     y_test = le.transform(y_test.flatten())
-    
+
     # Split data into train and validation sets
     x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)
 
@@ -110,5 +111,6 @@ def main() -> None:
 
 if __name__ == "__main__":
     import doctest
+
     doctest.testmod(verbose=True)
     main()

From e6fe4c4b14278eadab86c93255a2a38dc2e43ee8 Mon Sep 17 00:00:00 2001
From: Ishan Mishra <33893659+legitishan@users.noreply.github.com>
Date: Wed, 9 Oct 2024 00:50:47 +0530
Subject: [PATCH 4/4] Delete machine_learning/simCLR.py

---
 machine_learning/simCLR.py | 115 -------------------------------------
 1 file changed, 115 deletions(-)
 delete mode 100644 machine_learning/simCLR.py

diff --git a/machine_learning/simCLR.py b/machine_learning/simCLR.py
deleted file mode 100644
index 2f66d94e9321..000000000000
--- a/machine_learning/simCLR.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-Implementation of Self-Supervised Learning (SSL) with SimCLR. SimCLR is a framework for learning visual representations without labels by maximizing the agreement between different augmented views of the same image.
-"""
-
-import numpy as np
-import tensorflow as tf
-from tensorflow.keras import layers
-from tensorflow.keras.models import Model
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.losses import SparseCategoricalCrossentropy
-from tensorflow.keras.applications import ResNet50
-from sklearn.metrics import ConfusionMatrixDisplay
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import matplotlib.pyplot as plt
-
-
-def data_handling(data: dict) -> tuple:
-    """
-    Handles the data by splitting features and targets.
-
-    >>> data_handling({'data': np.array([[0.1, 0.2], [0.3, 0.4]]), 'target': np.array([0, 1])})
-    (array([[0.1, 0.2], [0.3, 0.4]]), array([0, 1]))
-    """
-    return (data["data"], data["target"])
-
-
-def simclr_model(input_shape=(32, 32, 3), projection_dim=64) -> Model:
-    """
-    Builds a SimCLR model based on ResNet50.
-
-    >>> simclr_model().summary()  # doctest: +ELLIPSIS
-    Model: "model"
-    _________________________________________________________________
-    ...
-    """
-    base_model = ResNet50(include_top=False, input_shape=input_shape, pooling="avg")
-    base_model.trainable = True
-
-    inputs = layers.Input(shape=input_shape)
-    x = base_model(inputs, training=True)
-    x = layers.Dense(projection_dim, activation="relu")(x)
-    outputs = layers.Dense(projection_dim)(x)
-    return Model(inputs, outputs)
-
-
-def contrastive_loss(projection_1, projection_2, temperature=0.1):
-    """
-    Contrastive loss function for self-supervised learning.
-
-    >>> contrastive_loss(np.array([0.1]), np.array([0.2]))
-    0.0
-    """
-    projections = tf.concat([projection_1, projection_2], axis=0)
-    similarity_matrix = tf.matmul(projections, projections, transpose_b=True)
-    labels = tf.range(tf.shape(projections)[0])
-    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, similarity_matrix)
-    return tf.reduce_mean(loss)
-
-
-def main() -> None:
-    """
-    >>> main()
-    """
-    # Load a small dataset (using CIFAR-10 as an example)
-    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
-    x_train, x_test = x_train / 255.0, x_test / 255.0
-
-    # Use label encoder to convert labels into numerical form
-    le = LabelEncoder()
-    y_train = le.fit_transform(y_train.flatten())
-    y_test = le.transform(y_test.flatten())
-
-    # Split data into train and validation sets
-    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)
-
-    # Build the SimCLR model
-    model = simclr_model()
-    optimizer = Adam()
-    loss_fn = SparseCategoricalCrossentropy(from_logits=True)
-
-    # Training the SimCLR model
-    for epoch in range(10):
-        with tf.GradientTape() as tape:
-            projections_1 = model(x_train)
-            projections_2 = model(x_train)  # Normally, this would use augmented views
-            loss = contrastive_loss(projections_1, projections_2)
-        gradients = tape.gradient(loss, model.trainable_variables)
-        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-        print(f"Epoch {epoch+1}: Contrastive Loss = {loss.numpy()}")
-
-    # Create a new model with a classification head for evaluation
-    classifier = layers.Dense(10, activation="softmax")(model.output)
-    classifier_model = Model(model.input, classifier)
-    classifier_model.compile(optimizer=Adam(), loss=loss_fn, metrics=["accuracy"])
-    classifier_model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=5)
-
-    # Display the confusion matrix of the classifier
-    ConfusionMatrixDisplay.from_estimator(
-        classifier_model,
-        x_test,
-        y_test,
-        display_labels=le.classes_,
-        cmap="Blues",
-        normalize="true",
-    )
-    plt.title("Normalized Confusion Matrix - CIFAR-10")
-    plt.show()
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod(verbose=True)
-    main()