From d6609cbb73542d2b8bdd12515d80fc1bea4d7e7a Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 01:02:11 +0530
Subject: [PATCH 01/27] Create README.md

---
 llm_experiments/README.md | 89 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 llm_experiments/README.md

diff --git a/llm_experiments/README.md b/llm_experiments/README.md
new file mode 100644
index 000000000000..d49513f9d026
--- /dev/null
+++ b/llm_experiments/README.md
@@ -0,0 +1,89 @@
+
+
+
+# Mixtral-Experiment Series
+
+Welcome to the Mixtral-Experiment series! This series of notebooks and scripts aims to provide a comprehensive guide on investigating the internal workings of Large Language Models (LLMs), understanding how they process inputs, and experimenting with their architectures.
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [Series Overview](#series-overview)
+- [Getting Started](#getting-started)
+- [Notebooks and Scripts](#notebooks-and-scripts)
+- [Contributing](#contributing)
+- [License](#license)
+
+## Introduction
+
+Large Language Models (LLMs) have revolutionized the field of natural language processing (NLP) by achieving state-of-the-art performance on various tasks. However, understanding their internal workings and how they process inputs can be challenging. This series aims to demystify LLMs by providing detailed explanations, hands-on experiments, and practical tips for tweaking their architectures.
+
+## Series Overview
+
+The Mixtral-Experiment series will cover the following topics:
+
+1. **Understanding LLM Architectures**:
+   - An overview of popular LLM architectures like Transformers, BERT, and Mixtral.
+   - Detailed explanations of key components such as embedding layers, self-attention mechanisms, and Mixture of Experts (MoE) layers.
+
+2. **Investigating Input Processing**:
+   - How inputs are tokenized and embedded.
+   - The role of attention mechanisms in processing sequences.
+   - Visualizing and analyzing the outputs at various layers of the model.
+
+3. **Tweaking LLM Architectures**:
+   - Experimenting with different configurations and hyperparameters.
+   - Modifying existing LLM architectures to improve performance or adapt to specific tasks.
+   - Implementing custom layers and components.
+
+4. **Conducting New Experiments**:
+   - Designing and implementing new experiments to test hypotheses about LLM behavior.
+   - Evaluating the impact of architectural changes on model performance.
+   - Sharing insights and findings with the community.
+
+## Getting Started
+
+To get started with the LLM-Experiment series, you will need the following:
+
+1. **Python Environment**:
+   - All these notebooks are created in Kaggle or Google Colab, So it's recommended to use the same to reproduce the results for other models
+
+
+2. **Hugging Face Account**:
+   - Create a Hugging Face account and obtain an API token.
+   - Login to Hugging Face using the provided token or username and token.
+   - Most of the Mistral,Llama models needs some sort of Agreement acceptance
+
+3. **Notebooks and Scripts**:
+   - Clone this repository to access the notebooks and scripts or you can directly open in Google Colab 
+   - Follow the instructions in each notebook to run the experiments and analyze the results.
+
+## Notebooks and Scripts
+
+The series will include the following notebooks and scripts:
+
+1. **Mixtral Model Analysis**:
+   - Analyzing the architecture and configuration of the Mixtral model.
+   - Registering hooks to capture the outputs at various layers.
+
+2. **Input Processing and Embedding**: - Upcoming
+
+
+3. **Attention Mechanisms and improvements**: - Upcoming
+
+
+4. **Rolling Buffer,KV-cache,Sliding Window Attention**: - Upcoming
+
+
+5. **Tweaking Model Architectures - Adapters,Down-Casting**: - Upcoming
+   
+
+## Contributing
+
+We welcome contributions from the community! If you have any ideas, suggestions, or improvements, please feel free to open an issue or submit a pull request.
+
+## License
+
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
+
+

From 28b1f02fef762f2a35d07258981bbe1afd5e0c1b Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 01:06:34 +0530
Subject: [PATCH 02/27] Update README.md

---
 llm_experiments/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llm_experiments/README.md b/llm_experiments/README.md
index d49513f9d026..06283bf31a07 100644
--- a/llm_experiments/README.md
+++ b/llm_experiments/README.md
@@ -1,9 +1,9 @@
 
 
 
-# Mixtral-Experiment Series
+# LLM-Experiment Series
 
-Welcome to the Mixtral-Experiment series! This series of notebooks and scripts aims to provide a comprehensive guide on investigating the internal workings of Large Language Models (LLMs), understanding how they process inputs, and experimenting with their architectures.
+Welcome to the LL-Experiment series! This series of notebooks and scripts aims to provide a comprehensive guide on investigating the internal workings of Large Language Models (LLMs), understanding how they process inputs, and experimenting with their architectures.
 
 ## Table of Contents
 
@@ -20,11 +20,11 @@ Large Language Models (LLMs) have revolutionized the field of natural language p
 
 ## Series Overview
 
-The Mixtral-Experiment series will cover the following topics:
+The LLM-Experiment series will cover the following topics:
 
 1. **Understanding LLM Architectures**:
-   - An overview of popular LLM architectures like Transformers, BERT, and Mixtral.
-   - Detailed explanations of key components such as embedding layers, self-attention mechanisms, and Mixture of Experts (MoE) layers.
+   - An overview of popular open source LLM architectures like Whisper, Llama, and Mixtral.
+   - Key trouble shooting during experimentation
 
 2. **Investigating Input Processing**:
    - How inputs are tokenized and embedded.

From 998eed43a3938bf6c5b2b7bd1f2496095249754c Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 01:08:56 +0530
Subject: [PATCH 03/27] Add files via upload

---
 llm_experiments/Mixtral_Experiment.ipynb | 965 +++++++++++++++++++++++
 1 file changed, 965 insertions(+)
 create mode 100644 llm_experiments/Mixtral_Experiment.ipynb

diff --git a/llm_experiments/Mixtral_Experiment.ipynb b/llm_experiments/Mixtral_Experiment.ipynb
new file mode 100644
index 000000000000..4903471af082
--- /dev/null
+++ b/llm_experiments/Mixtral_Experiment.ipynb
@@ -0,0 +1,965 @@
+{
+  "metadata": {
+    "kernelspec": {
+      "language": "python",
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.10.14",
+      "mimetype": "text/x-python",
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "pygments_lexer": "ipython3",
+      "nbconvert_exporter": "python",
+      "file_extension": ".py"
+    },
+    "kaggle": {
+      "accelerator": "nvidiaTeslaT4",
+      "dataSources": [],
+      "dockerImageVersionId": 30787,
+      "isInternetEnabled": true,
+      "language": "python",
+      "sourceType": "notebook",
+      "isGpuEnabled": true
+    },
+    "colab": {
+      "name": "Mixtral-Experiment",
+      "provenance": []
+    }
+  },
+  "nbformat_minor": 0,
+  "nbformat": 4,
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "from huggingface_hub import login\n",
+        "\n",
+        "# Fetch Hugging Face username and token from Colab secrets\n",
+        "HF_USERNAME = \"pritam3355\"\n",
+        "HF_TOKEN = \"HF_TOKEN\"\n",
+        "\n",
+        "# Login to Hugging Face\n",
+        "try:\n",
+        "  login(token=HF_TOKEN)\n",
+        "except ValueError:\n",
+        "  # If token is not valid or found, login with username and token\n",
+        "  # (likely requires manual authorization)\n",
+        "  login(username=HF_USERNAME, token=HF_TOKEN)"
+      ],
+      "metadata": {
+        "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+        "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:20:43.271188Z",
+          "iopub.execute_input": "2024-10-18T18:20:43.271841Z",
+          "iopub.status.idle": "2024-10-18T18:20:43.353056Z",
+          "shell.execute_reply.started": "2024-10-18T18:20:43.271801Z",
+          "shell.execute_reply": "2024-10-18T18:20:43.35218Z"
+        },
+        "id": "H5JWFz2XAAak",
+        "outputId": "af45db86-89f6-4349-c2d9-15d969f3d3f2"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "text": "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: fineGrained).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n",
+          "output_type": "stream"
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Here Using Mixtral model to extract and analyze how the input sequence is processed in Forward pass. Mixtral is similar to Mistal model but has more parameters"
+      ],
+      "metadata": {
+        "id": "bLD_CkBUAQMy"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+        "\n",
+        "model_id = \"mistralai/Mixtral-8x7B-v0.1\"\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_id,device=\"auto\")\n",
+        "\n",
+        "model = AutoModelForCausalLM.from_pretrained(model_id,trust_remote_code=True,\n",
+        "                                             torch_dtype=torch.bfloat16,\n",
+        "                                      low_cpu_mem_usage=True,device_map=\"auto\")\n"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:20:43.354757Z",
+          "iopub.execute_input": "2024-10-18T18:20:43.355493Z",
+          "iopub.status.idle": "2024-10-18T18:30:40.651163Z",
+          "shell.execute_reply.started": "2024-10-18T18:20:43.355448Z",
+          "shell.execute_reply": "2024-10-18T18:30:40.650377Z"
+        },
+        "id": "E3nm2iWcAAaq",
+        "outputId": "82d9a608-7eff-4578-b328-f9f773ed4f39",
+        "colab": {
+          "referenced_widgets": [
+            "fa5c2b7f05bc412993098a3731e72989",
+            "e64a4b6de34d4f40b88305ce507e3658",
+            "4675ed906a964735b4334458935ab4b9",
+            "e4f24bafae8f4397b76818a34ca9d6e4",
+            "3621e97c28544d34ab3953c22d227cd0",
+            "dd02aa16c10b4ab78373aa3dae939489",
+            "44e75ecc95b74f03a7a58e6ea21165c1",
+            "6d26de44c0334077b6c14104747a48ad",
+            "57c7fa8051a94bcb96c0309651ab8298",
+            "b736720173fd4ba5bbe54cbcc1177423",
+            "368fe041fff84949ac30d3d45ac78a0d",
+            "79ff492b16e946c8a6238d31b181ffc8",
+            "2a12b5905b434c11beaaceaf7e1a6394",
+            "9f16b85fde7148b7931c30fb024c87d5",
+            "f0bae3fc9925442e82d58ecd7a305808",
+            "2181a83c39114bc78b1e4859b3ccdfed",
+            "14ad494e78084d8983bc6c0751f9d941",
+            "280600190e10484db98261256542f236",
+            "562e9f5c0d0d4228b218553019e483b6",
+            "cc6675e71cea4018b6adff29d60f0a82",
+            "39633f760e104265b1ddc2bcb3e4961d",
+            "64288ea1c3074a528339b9d0f9729d18",
+            "584114fa6b554a1495f6aa14011e0cc6",
+            "2756416bfbcf474c94c1ca2ab4b7d8e3",
+            "8c6e4f33682040feb42c1385c66b7ba2",
+            "68cc9722525c46328cf963c2a4f2740a",
+            "06367bbf0c094ba1bc7d481fb1bfc3f9",
+            "1434b26ed3b4449b8fd6a76e0f1e5c97"
+          ]
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "fa5c2b7f05bc412993098a3731e72989"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "e64a4b6de34d4f40b88305ce507e3658"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "4675ed906a964735b4334458935ab4b9"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "e4f24bafae8f4397b76818a34ca9d6e4"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "3621e97c28544d34ab3953c22d227cd0"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model.safetensors.index.json:   0%|          | 0.00/92.7k [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "dd02aa16c10b4ab78373aa3dae939489"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "Downloading shards:   0%|          | 0/19 [00:00<?, ?it/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "44e75ecc95b74f03a7a58e6ea21165c1"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "6d26de44c0334077b6c14104747a48ad"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "57c7fa8051a94bcb96c0309651ab8298"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "b736720173fd4ba5bbe54cbcc1177423"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "368fe041fff84949ac30d3d45ac78a0d"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "79ff492b16e946c8a6238d31b181ffc8"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "2a12b5905b434c11beaaceaf7e1a6394"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "9f16b85fde7148b7931c30fb024c87d5"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "f0bae3fc9925442e82d58ecd7a305808"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "2181a83c39114bc78b1e4859b3ccdfed"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "14ad494e78084d8983bc6c0751f9d941"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "280600190e10484db98261256542f236"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "562e9f5c0d0d4228b218553019e483b6"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "cc6675e71cea4018b6adff29d60f0a82"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "39633f760e104265b1ddc2bcb3e4961d"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "64288ea1c3074a528339b9d0f9729d18"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "584114fa6b554a1495f6aa14011e0cc6"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "2756416bfbcf474c94c1ca2ab4b7d8e3"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "8c6e4f33682040feb42c1385c66b7ba2"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "68cc9722525c46328cf963c2a4f2740a"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "06367bbf0c094ba1bc7d481fb1bfc3f9"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": "generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]",
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "1434b26ed3b4449b8fd6a76e0f1e5c97"
+            }
+          },
+          "metadata": {}
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Mistral Model Architecture\n",
+        "\n",
+        "```python\n",
+        "\n",
+        "MixtralForCausalLM(\n",
+        "  (model): MixtralModel(\n",
+        "    (embed_tokens): Embedding(32000, 4096)\n",
+        "    (layers): ModuleList(\n",
+        "      (0-31): 32 x MixtralDecoderLayer(\n",
+        "        (self_attn): MixtralSdpaAttention(\n",
+        "          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+        "          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
+        "          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
+        "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+        "          (rotary_emb): MixtralRotaryEmbedding()\n",
+        "        )\n",
+        "        (block_sparse_moe): MixtralSparseMoeBlock(\n",
+        "          (gate): Linear(in_features=4096, out_features=8, bias=False)\n",
+        "          (experts): ModuleList(\n",
+        "            (0-7): 8 x MixtralBlockSparseTop2MLP(\n",
+        "              (w1): Linear(in_features=4096, out_features=14336, bias=False)\n",
+        "              (w2): Linear(in_features=14336, out_features=4096, bias=False)\n",
+        "              (w3): Linear(in_features=4096, out_features=14336, bias=False)\n",
+        "              (act_fn): SiLU()\n",
+        "            )\n",
+        "          )\n",
+        "        )\n",
+        "        (input_layernorm): MixtralRMSNorm((4096,), eps=1e-05)\n",
+        "        (post_attention_layernorm): MixtralRMSNorm((4096,), eps=1e-05)\n",
+        "      )\n",
+        "    )\n",
+        "    (norm): MixtralRMSNorm((4096,), eps=1e-05)\n",
+        "  )\n",
+        "  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",
+        ")\n",
+        "\n",
+        "\n",
+        "```"
+      ],
+      "metadata": {
+        "id": "DaKAIgb6AAas"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Model Config\n",
+        "\n",
+        "```python\n",
+        "\n",
+        "MixtralConfig {\n",
+        "  \"_name_or_path\": \"mistralai/Mixtral-8x7B-v0.1\",\n",
+        "  \"architectures\": [\n",
+        "    \"MixtralForCausalLM\"\n",
+        "  ],\n",
+        "  \"attention_dropout\": 0.0,\n",
+        "  \"bos_token_id\": 1,\n",
+        "  \"eos_token_id\": 2,\n",
+        "  \"hidden_act\": \"silu\",\n",
+        "  \"hidden_size\": 4096,\n",
+        "  \"initializer_range\": 0.02,\n",
+        "  \"intermediate_size\": 14336,\n",
+        "  \"max_position_embeddings\": 32768,\n",
+        "  \"model_type\": \"mixtral\",\n",
+        "  \"num_attention_heads\": 32,\n",
+        "  \"num_experts_per_tok\": 2,\n",
+        "  \"num_hidden_layers\": 32,\n",
+        "  \"num_key_value_heads\": 8,\n",
+        "  \"num_local_experts\": 8,\n",
+        "  \"output_router_logits\": false,\n",
+        "  \"rms_norm_eps\": 1e-05,\n",
+        "  \"rope_theta\": 1000000.0,\n",
+        "  \"router_aux_loss_coef\": 0.02,\n",
+        "  \"router_jitter_noise\": 0.0,\n",
+        "  \"sliding_window\": null,\n",
+        "  \"tie_word_embeddings\": false,\n",
+        "  \"torch_dtype\": \"bfloat16\",\n",
+        "  \"transformers_version\": \"4.45.1\",\n",
+        "  \"use_cache\": true,\n",
+        "  \"vocab_size\": 32000\n",
+        "}\n",
+        "\n",
+        "```"
+      ],
+      "metadata": {
+        "id": "tQtMnYC-AAav"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## These are the most Important Parts of the Model\n",
+        "\n",
+        "\n",
+        "1. **Embedding Layer**: This converts token IDs to embeddings.\n",
+        "2. **Self-Attention Layer**: This performs the self-attention mechanism.\n",
+        "3. **Block Sparse MoE Experts**: This applies the Mixture of Experts (MoE) mechanism.\n",
+        "4. **Post-Attention LayerNorm**: This normalizes the output after the attention mechanism.\n",
+        "5. **Final Norm Layer**: This normalizes the final output of the model.\n",
+        "6. **Language Model Head**: This converts the final hidden states to logits.\n"
+      ],
+      "metadata": {
+        "id": "YLZWKqhxAAax"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "total_params = sum(p.numel() for p in model.parameters())\n",
+        "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+        "\n",
+        "print(f\"Total Parameters: {total_params}\")\n",
+        "print(f\"Trainable Parameters: {trainable_params}\")\n",
+        "print(f\"Non-Trainable Parameters: {total_params - trainable_params}\")"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:30:40.652378Z",
+          "iopub.execute_input": "2024-10-18T18:30:40.652918Z",
+          "iopub.status.idle": "2024-10-18T18:30:40.673546Z",
+          "shell.execute_reply.started": "2024-10-18T18:30:40.652882Z",
+          "shell.execute_reply": "2024-10-18T18:30:40.672605Z"
+        },
+        "id": "X_LcNEfDAAax",
+        "outputId": "14a7e102-c12a-40d6-d9c2-da12acbbc4fb"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "text": "Total Parameters: 46702792704\nTrainable Parameters: 46702792704\nNon-Trainable Parameters: 0\n",
+          "output_type": "stream"
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Define a dictionary to store the outputs\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "ajT_agN8AAay"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "\n",
+        "\n",
+        "outputs = {\n",
+        "    \"embed_tokens\": None,\n",
+        "    \"self_attn_layer_1\": None,\n",
+        "    \"block_sparse_moe_experts\": None,\n",
+        "    \"post_attention_layernorm\": None,\n",
+        "    \"norm\": None,\n",
+        "    \"lm_head\": None,\n",
+        "    \"input_layernorm\": None,  # Adding hook for input layernorm\n",
+        "    \"self_attn_q_proj\": None,  # Adding hook for q_proj in self_attn\n",
+        "    \"self_attn_k_proj\": None,  # Adding hook for k_proj in self_attn\n",
+        "    \"self_attn_v_proj\": None,  # Adding hook for v_proj in self_attn\n",
+        "    \"self_attn_o_proj\": None,  # Adding hook for o_proj in self_attn\n",
+        "    \"block_sparse_moe_gate\": None,  # Adding hook for gate in block_sparse_moe\n",
+        "}\n"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:30:40.675424Z",
+          "iopub.execute_input": "2024-10-18T18:30:40.675923Z",
+          "iopub.status.idle": "2024-10-18T18:30:40.686803Z",
+          "shell.execute_reply.started": "2024-10-18T18:30:40.675885Z",
+          "shell.execute_reply": "2024-10-18T18:30:40.685864Z"
+        },
+        "id": "WslzL6fIAAaz"
+      },
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Define & Register hooks"
+      ],
+      "metadata": {
+        "id": "uZzq-iu_AAa0"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the hook functions\n",
+        "def hook_fn(name):\n",
+        "    def hook(module, input, output):\n",
+        "        outputs[name] = output\n",
+        "    return hook\n",
+        "\n",
+        "# Register hooks\n",
+        "model.model.embed_tokens.register_forward_hook(hook_fn(\"embed_tokens\"))\n",
+        "model.model.layers[0].self_attn.register_forward_hook(hook_fn(\"self_attn_layer_1\"))\n",
+        "model.model.layers[0].block_sparse_moe.experts[0].register_forward_hook(hook_fn(\"block_sparse_moe_experts\"))\n",
+        "model.model.layers[0].post_attention_layernorm.register_forward_hook(hook_fn(\"post_attention_layernorm\"))\n",
+        "model.model.norm.register_forward_hook(hook_fn(\"norm\"))\n",
+        "model.lm_head.register_forward_hook(hook_fn(\"lm_head\"))\n",
+        "\n",
+        "# Additional hooks\n",
+        "model.model.layers[0].input_layernorm.register_forward_hook(hook_fn(\"input_layernorm\"))\n",
+        "model.model.layers[0].self_attn.q_proj.register_forward_hook(hook_fn(\"self_attn_q_proj\"))\n",
+        "model.model.layers[0].self_attn.k_proj.register_forward_hook(hook_fn(\"self_attn_k_proj\"))\n",
+        "model.model.layers[0].self_attn.v_proj.register_forward_hook(hook_fn(\"self_attn_v_proj\"))\n",
+        "model.model.layers[0].self_attn.o_proj.register_forward_hook(hook_fn(\"self_attn_o_proj\"))\n",
+        "model.model.layers[0].block_sparse_moe.gate.register_forward_hook(hook_fn(\"block_sparse_moe_gate\"))"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:30:40.68794Z",
+          "iopub.execute_input": "2024-10-18T18:30:40.688277Z",
+          "iopub.status.idle": "2024-10-18T18:30:40.705334Z",
+          "shell.execute_reply.started": "2024-10-18T18:30:40.688245Z",
+          "shell.execute_reply": "2024-10-18T18:30:40.704462Z"
+        },
+        "id": "8LONwVHwAAa1",
+        "outputId": "8cc24463-f06b-4069-b873-af792beecfd5"
+      },
+      "outputs": [
+        {
+          "execution_count": 7,
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": "<torch.utils.hooks.RemovableHandle at 0x7d29af68c820>"
+          },
+          "metadata": {}
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Forward Pass"
+      ],
+      "metadata": {
+        "id": "vORmhXXTAAa3"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "input_text = \"The quick brown fox jumps over the lazy dog !\"\n",
+        "\n",
+        "# Tokenize the input text\n",
+        "inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
+        "print(\"Tokenized inputs {'input_ids','attention_mask'} - \",inputs)\n",
+        "print(\"Decoded tokens : \",tokenizer.decode(inputs['input_ids'][0]))"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:30:40.706383Z",
+          "iopub.execute_input": "2024-10-18T18:30:40.706895Z",
+          "iopub.status.idle": "2024-10-18T18:30:40.728093Z",
+          "shell.execute_reply.started": "2024-10-18T18:30:40.706863Z",
+          "shell.execute_reply": "2024-10-18T18:30:40.727243Z"
+        },
+        "id": "dcC4RjNTAAa4",
+        "outputId": "f0d4c2e3-b7f6-471d-9b9b-ce5316c47431"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "text": "Tokenized inputs {'input_ids','attention_mask'} -  {'input_ids': tensor([[    1,   415,  2936,  9060,   285,  1142,   461, 10575,   754,   272,\n         17898,  3914,   918]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\nDecoded tokens :  <s> The quick brown fox jumps over the lazy dog !\n",
+          "output_type": "stream"
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "with torch.no_grad():\n",
+        "    model_output = model(**inputs)"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:30:40.729287Z",
+          "iopub.execute_input": "2024-10-18T18:30:40.729872Z",
+          "iopub.status.idle": "2024-10-18T18:36:43.660892Z",
+          "shell.execute_reply.started": "2024-10-18T18:30:40.72983Z",
+          "shell.execute_reply": "2024-10-18T18:36:43.660087Z"
+        },
+        "id": "4x2A5-m-AAa6",
+        "outputId": "d0fc43d2-1229-4582-d4d3-6b5f745be24e"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "text": "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n",
+          "output_type": "stream"
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "for layer, output in outputs.items():\n",
+        "    print(f\"Output at {layer}: \")\n",
+        "    if isinstance(output, torch.Tensor):\n",
+        "        print(output.shape, type(output))\n",
+        "    elif isinstance(output, tuple):\n",
+        "        for i, o in enumerate(output):\n",
+        "            print(f\"Output {i}: {o.shape if isinstance(o, torch.Tensor) else type(o)}\")\n",
+        "    else:\n",
+        "        print(type(output))\n",
+        "    print(\"-\" * 100)"
+      ],
+      "metadata": {
+        "trusted": true,
+        "execution": {
+          "iopub.status.busy": "2024-10-18T18:56:11.28238Z",
+          "iopub.execute_input": "2024-10-18T18:56:11.283252Z",
+          "iopub.status.idle": "2024-10-18T18:56:11.291437Z",
+          "shell.execute_reply.started": "2024-10-18T18:56:11.283214Z",
+          "shell.execute_reply": "2024-10-18T18:56:11.290478Z"
+        },
+        "id": "xVuaYV3pAAa7",
+        "outputId": "84e2f36e-0f10-4be1-9fdd-581fe61fabb1"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "text": "Output at embed_tokens: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_layer_1: \nOutput 0: torch.Size([1, 13, 4096])\nOutput 1: <class 'NoneType'>\nOutput 2: <class 'transformers.cache_utils.DynamicCache'>\n----------------------------------------------------------------------------------------------------\nOutput at block_sparse_moe_experts: \ntorch.Size([3, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at post_attention_layernorm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at norm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at lm_head: \ntorch.Size([1, 13, 32000]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at input_layernorm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_q_proj: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_k_proj: \ntorch.Size([1, 13, 1024]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_v_proj: \ntorch.Size([1, 13, 1024]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_o_proj: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at block_sparse_moe_gate: \ntorch.Size([13, 8]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\n",
+          "output_type": "stream"
+        }
+      ],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "\n",
+        "\n",
+        "### Explanation of Shapes:\n",
+        "\n",
+        "### 1. **embed_tokens**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size (number of sequences in this batch, here it's 1 sequence).\n",
+        "  - `13`: Sequence length (the number of tokens in the input, here 13 tokens).\n",
+        "  - `4096`: Embedding size (each token is mapped to a 4096-dimensional vector).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, embed_dim)`\n",
+        "\n",
+        "### 2. **self_attn_layer_1**\n",
+        "- **Shape (Output 0):** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Hidden size (output of the attention mechanism).\n",
+        "\n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 3. **block_sparse_moe_experts**\n",
+        "- **Shape:** `torch.Size([3, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `3`: Number of activated experts for the MoE (Mixture of Experts) layer. In the `block_sparse_moe`, two experts are chosen per token (2 tokens in the batch may activate the same experts, hence the 3).\n",
+        "  - `4096`: Expert embedding size (the dimensionality of the expert's output).\n",
+        "  \n",
+        "  **Shape Format:** `(num_experts, expert_embed_dim)`\n",
+        "\n",
+        "### 4. **post_attention_layernorm**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Output dimension after the post-attention normalization step.\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 5. **norm**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Output dimension after applying the final normalization layer.\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 6. **lm_head**\n",
+        "- **Shape:** `torch.Size([1, 13, 32000])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `32000`: Vocabulary size (logits over the vocabulary for each token in the sequence).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, vocab_size)`\n",
+        "\n",
+        "### 7. **input_layernorm**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Output dimension after the input layer normalization step.\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 8. **self_attn_q_proj**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Query projection size (the hidden state is projected to the query vector space).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 9. **self_attn_k_proj**\n",
+        "- **Shape:** `torch.Size([1, 13, 1024])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `1024`: Key projection size (here, the key is projected to a smaller dimensional space compared to queries/values).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, key_dim)`\n",
+        "\n",
+        "### 10. **self_attn_v_proj**\n",
+        "- **Shape:** `torch.Size([1, 13, 1024])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `1024`: Value projection size (the values are also projected to the same size as the keys).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, value_dim)`\n",
+        "\n",
+        "### 11. **self_attn_o_proj**\n",
+        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
+        "- **Explanation:**\n",
+        "  - `1`: Batch size.\n",
+        "  - `13`: Sequence length.\n",
+        "  - `4096`: Output projection size (the final result after the attention mechanism is projected back to the original hidden dimension).\n",
+        "  \n",
+        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
+        "\n",
+        "### 12. **block_sparse_moe_gate**\n",
+        "- **Shape:** `torch.Size([13, 8])`\n",
+        "- **Explanation:**\n",
+        "  - `13`: Sequence length (the gate operates per token).\n",
+        "  - `8`: Number of experts (gating decisions are made over all available experts).\n",
+        "  \n",
+        "  **Shape Format:** `(seq_len, num_experts)`\n",
+        "\n",
+        "### Summary Table:\n",
+        "\n",
+        "| Layer Name                   | Shape Format                      | Dimensions | Notes                                                      |\n",
+        "|------------------------------|------------------------------------|------------|------------------------------------------------------------|\n",
+        "| `embed_tokens`               | `(batch_size, seq_len, embed_dim)` | `[1, 13, 4096]` | Embedding tokens from vocabulary.                           |\n",
+        "| `self_attn_layer_1`           | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Output of first attention layer.                            |\n",
+        "| `block_sparse_moe_experts`    | `(num_experts, expert_embed_dim)`  | `[3, 4096]` | Expert outputs in MoE block.                                |\n",
+        "| `post_attention_layernorm`    | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Layer norm after attention.                                 |\n",
+        "| `norm`                       | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Final normalization layer.                                  |\n",
+        "| `lm_head`                    | `(batch_size, seq_len, vocab_size)`| `[1, 13, 32000]` | Logits for each token over the vocabulary.                  |\n",
+        "| `input_layernorm`             | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Input layer normalization.                                  |\n",
+        "| `self_attn_q_proj`            | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Query projection in self-attention.                         |\n",
+        "| `self_attn_k_proj`            | `(batch_size, seq_len, key_dim)`   | `[1, 13, 1024]` | Key projection in self-attention.                           |\n",
+        "| `self_attn_v_proj`            | `(batch_size, seq_len, value_dim)` | `[1, 13, 1024]` | Value projection in self-attention.                         |\n",
+        "| `self_attn_o_proj`            | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Output projection after attention.                          |\n",
+        "| `block_sparse_moe_gate`       | `(seq_len, num_experts)`           | `[13, 8]`   | Gating decisions for the mixture of experts.                |\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "FyugDOzXAAa8"
+      }
+    }
+  ]
+}
\ No newline at end of file

From 7019bf4a7fdc8a4fa53f26af222c4ac87096c7e4 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 01:17:28 +0530
Subject: [PATCH 04/27] Delete llm_experiments directory

---
 llm_experiments/Mixtral_Experiment.ipynb | 965 -----------------------
 llm_experiments/README.md                |  89 ---
 2 files changed, 1054 deletions(-)
 delete mode 100644 llm_experiments/Mixtral_Experiment.ipynb
 delete mode 100644 llm_experiments/README.md

diff --git a/llm_experiments/Mixtral_Experiment.ipynb b/llm_experiments/Mixtral_Experiment.ipynb
deleted file mode 100644
index 4903471af082..000000000000
--- a/llm_experiments/Mixtral_Experiment.ipynb
+++ /dev/null
@@ -1,965 +0,0 @@
-{
-  "metadata": {
-    "kernelspec": {
-      "language": "python",
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.10.14",
-      "mimetype": "text/x-python",
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "pygments_lexer": "ipython3",
-      "nbconvert_exporter": "python",
-      "file_extension": ".py"
-    },
-    "kaggle": {
-      "accelerator": "nvidiaTeslaT4",
-      "dataSources": [],
-      "dockerImageVersionId": 30787,
-      "isInternetEnabled": true,
-      "language": "python",
-      "sourceType": "notebook",
-      "isGpuEnabled": true
-    },
-    "colab": {
-      "name": "Mixtral-Experiment",
-      "provenance": []
-    }
-  },
-  "nbformat_minor": 0,
-  "nbformat": 4,
-  "cells": [
-    {
-      "cell_type": "code",
-      "source": [
-        "from huggingface_hub import login\n",
-        "\n",
-        "# Fetch Hugging Face username and token from Colab secrets\n",
-        "HF_USERNAME = \"pritam3355\"\n",
-        "HF_TOKEN = \"HF_TOKEN\"\n",
-        "\n",
-        "# Login to Hugging Face\n",
-        "try:\n",
-        "  login(token=HF_TOKEN)\n",
-        "except ValueError:\n",
-        "  # If token is not valid or found, login with username and token\n",
-        "  # (likely requires manual authorization)\n",
-        "  login(username=HF_USERNAME, token=HF_TOKEN)"
-      ],
-      "metadata": {
-        "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
-        "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:20:43.271188Z",
-          "iopub.execute_input": "2024-10-18T18:20:43.271841Z",
-          "iopub.status.idle": "2024-10-18T18:20:43.353056Z",
-          "shell.execute_reply.started": "2024-10-18T18:20:43.271801Z",
-          "shell.execute_reply": "2024-10-18T18:20:43.35218Z"
-        },
-        "id": "H5JWFz2XAAak",
-        "outputId": "af45db86-89f6-4349-c2d9-15d969f3d3f2"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "text": "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: fineGrained).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n",
-          "output_type": "stream"
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Here Using Mixtral model to extract and analyze how the input sequence is processed in Forward pass. Mixtral is similar to Mistal model but has more parameters"
-      ],
-      "metadata": {
-        "id": "bLD_CkBUAQMy"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import torch\n",
-        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
-        "\n",
-        "model_id = \"mistralai/Mixtral-8x7B-v0.1\"\n",
-        "tokenizer = AutoTokenizer.from_pretrained(model_id,device=\"auto\")\n",
-        "\n",
-        "model = AutoModelForCausalLM.from_pretrained(model_id,trust_remote_code=True,\n",
-        "                                             torch_dtype=torch.bfloat16,\n",
-        "                                      low_cpu_mem_usage=True,device_map=\"auto\")\n"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:20:43.354757Z",
-          "iopub.execute_input": "2024-10-18T18:20:43.355493Z",
-          "iopub.status.idle": "2024-10-18T18:30:40.651163Z",
-          "shell.execute_reply.started": "2024-10-18T18:20:43.355448Z",
-          "shell.execute_reply": "2024-10-18T18:30:40.650377Z"
-        },
-        "id": "E3nm2iWcAAaq",
-        "outputId": "82d9a608-7eff-4578-b328-f9f773ed4f39",
-        "colab": {
-          "referenced_widgets": [
-            "fa5c2b7f05bc412993098a3731e72989",
-            "e64a4b6de34d4f40b88305ce507e3658",
-            "4675ed906a964735b4334458935ab4b9",
-            "e4f24bafae8f4397b76818a34ca9d6e4",
-            "3621e97c28544d34ab3953c22d227cd0",
-            "dd02aa16c10b4ab78373aa3dae939489",
-            "44e75ecc95b74f03a7a58e6ea21165c1",
-            "6d26de44c0334077b6c14104747a48ad",
-            "57c7fa8051a94bcb96c0309651ab8298",
-            "b736720173fd4ba5bbe54cbcc1177423",
-            "368fe041fff84949ac30d3d45ac78a0d",
-            "79ff492b16e946c8a6238d31b181ffc8",
-            "2a12b5905b434c11beaaceaf7e1a6394",
-            "9f16b85fde7148b7931c30fb024c87d5",
-            "f0bae3fc9925442e82d58ecd7a305808",
-            "2181a83c39114bc78b1e4859b3ccdfed",
-            "14ad494e78084d8983bc6c0751f9d941",
-            "280600190e10484db98261256542f236",
-            "562e9f5c0d0d4228b218553019e483b6",
-            "cc6675e71cea4018b6adff29d60f0a82",
-            "39633f760e104265b1ddc2bcb3e4961d",
-            "64288ea1c3074a528339b9d0f9729d18",
-            "584114fa6b554a1495f6aa14011e0cc6",
-            "2756416bfbcf474c94c1ca2ab4b7d8e3",
-            "8c6e4f33682040feb42c1385c66b7ba2",
-            "68cc9722525c46328cf963c2a4f2740a",
-            "06367bbf0c094ba1bc7d481fb1bfc3f9",
-            "1434b26ed3b4449b8fd6a76e0f1e5c97"
-          ]
-        }
-      },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "fa5c2b7f05bc412993098a3731e72989"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "e64a4b6de34d4f40b88305ce507e3658"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "4675ed906a964735b4334458935ab4b9"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "e4f24bafae8f4397b76818a34ca9d6e4"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "3621e97c28544d34ab3953c22d227cd0"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model.safetensors.index.json:   0%|          | 0.00/92.7k [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "dd02aa16c10b4ab78373aa3dae939489"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "Downloading shards:   0%|          | 0/19 [00:00<?, ?it/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "44e75ecc95b74f03a7a58e6ea21165c1"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "6d26de44c0334077b6c14104747a48ad"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "57c7fa8051a94bcb96c0309651ab8298"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "b736720173fd4ba5bbe54cbcc1177423"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "368fe041fff84949ac30d3d45ac78a0d"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "79ff492b16e946c8a6238d31b181ffc8"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "2a12b5905b434c11beaaceaf7e1a6394"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "9f16b85fde7148b7931c30fb024c87d5"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "f0bae3fc9925442e82d58ecd7a305808"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "2181a83c39114bc78b1e4859b3ccdfed"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "14ad494e78084d8983bc6c0751f9d941"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "280600190e10484db98261256542f236"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "562e9f5c0d0d4228b218553019e483b6"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "cc6675e71cea4018b6adff29d60f0a82"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "39633f760e104265b1ddc2bcb3e4961d"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "64288ea1c3074a528339b9d0f9729d18"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "584114fa6b554a1495f6aa14011e0cc6"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "2756416bfbcf474c94c1ca2ab4b7d8e3"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "8c6e4f33682040feb42c1385c66b7ba2"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "68cc9722525c46328cf963c2a4f2740a"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "06367bbf0c094ba1bc7d481fb1bfc3f9"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": "generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]",
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "1434b26ed3b4449b8fd6a76e0f1e5c97"
-            }
-          },
-          "metadata": {}
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Mistral Model Architecture\n",
-        "\n",
-        "```python\n",
-        "\n",
-        "MixtralForCausalLM(\n",
-        "  (model): MixtralModel(\n",
-        "    (embed_tokens): Embedding(32000, 4096)\n",
-        "    (layers): ModuleList(\n",
-        "      (0-31): 32 x MixtralDecoderLayer(\n",
-        "        (self_attn): MixtralSdpaAttention(\n",
-        "          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
-        "          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
-        "          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
-        "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
-        "          (rotary_emb): MixtralRotaryEmbedding()\n",
-        "        )\n",
-        "        (block_sparse_moe): MixtralSparseMoeBlock(\n",
-        "          (gate): Linear(in_features=4096, out_features=8, bias=False)\n",
-        "          (experts): ModuleList(\n",
-        "            (0-7): 8 x MixtralBlockSparseTop2MLP(\n",
-        "              (w1): Linear(in_features=4096, out_features=14336, bias=False)\n",
-        "              (w2): Linear(in_features=14336, out_features=4096, bias=False)\n",
-        "              (w3): Linear(in_features=4096, out_features=14336, bias=False)\n",
-        "              (act_fn): SiLU()\n",
-        "            )\n",
-        "          )\n",
-        "        )\n",
-        "        (input_layernorm): MixtralRMSNorm((4096,), eps=1e-05)\n",
-        "        (post_attention_layernorm): MixtralRMSNorm((4096,), eps=1e-05)\n",
-        "      )\n",
-        "    )\n",
-        "    (norm): MixtralRMSNorm((4096,), eps=1e-05)\n",
-        "  )\n",
-        "  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",
-        ")\n",
-        "\n",
-        "\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "DaKAIgb6AAas"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Model Config\n",
-        "\n",
-        "```python\n",
-        "\n",
-        "MixtralConfig {\n",
-        "  \"_name_or_path\": \"mistralai/Mixtral-8x7B-v0.1\",\n",
-        "  \"architectures\": [\n",
-        "    \"MixtralForCausalLM\"\n",
-        "  ],\n",
-        "  \"attention_dropout\": 0.0,\n",
-        "  \"bos_token_id\": 1,\n",
-        "  \"eos_token_id\": 2,\n",
-        "  \"hidden_act\": \"silu\",\n",
-        "  \"hidden_size\": 4096,\n",
-        "  \"initializer_range\": 0.02,\n",
-        "  \"intermediate_size\": 14336,\n",
-        "  \"max_position_embeddings\": 32768,\n",
-        "  \"model_type\": \"mixtral\",\n",
-        "  \"num_attention_heads\": 32,\n",
-        "  \"num_experts_per_tok\": 2,\n",
-        "  \"num_hidden_layers\": 32,\n",
-        "  \"num_key_value_heads\": 8,\n",
-        "  \"num_local_experts\": 8,\n",
-        "  \"output_router_logits\": false,\n",
-        "  \"rms_norm_eps\": 1e-05,\n",
-        "  \"rope_theta\": 1000000.0,\n",
-        "  \"router_aux_loss_coef\": 0.02,\n",
-        "  \"router_jitter_noise\": 0.0,\n",
-        "  \"sliding_window\": null,\n",
-        "  \"tie_word_embeddings\": false,\n",
-        "  \"torch_dtype\": \"bfloat16\",\n",
-        "  \"transformers_version\": \"4.45.1\",\n",
-        "  \"use_cache\": true,\n",
-        "  \"vocab_size\": 32000\n",
-        "}\n",
-        "\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "tQtMnYC-AAav"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## These are the most Important Parts of the Model\n",
-        "\n",
-        "\n",
-        "1. **Embedding Layer**: This converts token IDs to embeddings.\n",
-        "2. **Self-Attention Layer**: This performs the self-attention mechanism.\n",
-        "3. **Block Sparse MoE Experts**: This applies the Mixture of Experts (MoE) mechanism.\n",
-        "4. **Post-Attention LayerNorm**: This normalizes the output after the attention mechanism.\n",
-        "5. **Final Norm Layer**: This normalizes the final output of the model.\n",
-        "6. **Language Model Head**: This converts the final hidden states to logits.\n"
-      ],
-      "metadata": {
-        "id": "YLZWKqhxAAax"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "total_params = sum(p.numel() for p in model.parameters())\n",
-        "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
-        "\n",
-        "print(f\"Total Parameters: {total_params}\")\n",
-        "print(f\"Trainable Parameters: {trainable_params}\")\n",
-        "print(f\"Non-Trainable Parameters: {total_params - trainable_params}\")"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:30:40.652378Z",
-          "iopub.execute_input": "2024-10-18T18:30:40.652918Z",
-          "iopub.status.idle": "2024-10-18T18:30:40.673546Z",
-          "shell.execute_reply.started": "2024-10-18T18:30:40.652882Z",
-          "shell.execute_reply": "2024-10-18T18:30:40.672605Z"
-        },
-        "id": "X_LcNEfDAAax",
-        "outputId": "14a7e102-c12a-40d6-d9c2-da12acbbc4fb"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "text": "Total Parameters: 46702792704\nTrainable Parameters: 46702792704\nNon-Trainable Parameters: 0\n",
-          "output_type": "stream"
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Define a dictionary to store the outputs\n",
-        "\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "ajT_agN8AAay"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "\n",
-        "\n",
-        "outputs = {\n",
-        "    \"embed_tokens\": None,\n",
-        "    \"self_attn_layer_1\": None,\n",
-        "    \"block_sparse_moe_experts\": None,\n",
-        "    \"post_attention_layernorm\": None,\n",
-        "    \"norm\": None,\n",
-        "    \"lm_head\": None,\n",
-        "    \"input_layernorm\": None,  # Adding hook for input layernorm\n",
-        "    \"self_attn_q_proj\": None,  # Adding hook for q_proj in self_attn\n",
-        "    \"self_attn_k_proj\": None,  # Adding hook for k_proj in self_attn\n",
-        "    \"self_attn_v_proj\": None,  # Adding hook for v_proj in self_attn\n",
-        "    \"self_attn_o_proj\": None,  # Adding hook for o_proj in self_attn\n",
-        "    \"block_sparse_moe_gate\": None,  # Adding hook for gate in block_sparse_moe\n",
-        "}\n"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:30:40.675424Z",
-          "iopub.execute_input": "2024-10-18T18:30:40.675923Z",
-          "iopub.status.idle": "2024-10-18T18:30:40.686803Z",
-          "shell.execute_reply.started": "2024-10-18T18:30:40.675885Z",
-          "shell.execute_reply": "2024-10-18T18:30:40.685864Z"
-        },
-        "id": "WslzL6fIAAaz"
-      },
-      "outputs": [],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Define & Register hooks"
-      ],
-      "metadata": {
-        "id": "uZzq-iu_AAa0"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Define the hook functions\n",
-        "def hook_fn(name):\n",
-        "    def hook(module, input, output):\n",
-        "        outputs[name] = output\n",
-        "    return hook\n",
-        "\n",
-        "# Register hooks\n",
-        "model.model.embed_tokens.register_forward_hook(hook_fn(\"embed_tokens\"))\n",
-        "model.model.layers[0].self_attn.register_forward_hook(hook_fn(\"self_attn_layer_1\"))\n",
-        "model.model.layers[0].block_sparse_moe.experts[0].register_forward_hook(hook_fn(\"block_sparse_moe_experts\"))\n",
-        "model.model.layers[0].post_attention_layernorm.register_forward_hook(hook_fn(\"post_attention_layernorm\"))\n",
-        "model.model.norm.register_forward_hook(hook_fn(\"norm\"))\n",
-        "model.lm_head.register_forward_hook(hook_fn(\"lm_head\"))\n",
-        "\n",
-        "# Additional hooks\n",
-        "model.model.layers[0].input_layernorm.register_forward_hook(hook_fn(\"input_layernorm\"))\n",
-        "model.model.layers[0].self_attn.q_proj.register_forward_hook(hook_fn(\"self_attn_q_proj\"))\n",
-        "model.model.layers[0].self_attn.k_proj.register_forward_hook(hook_fn(\"self_attn_k_proj\"))\n",
-        "model.model.layers[0].self_attn.v_proj.register_forward_hook(hook_fn(\"self_attn_v_proj\"))\n",
-        "model.model.layers[0].self_attn.o_proj.register_forward_hook(hook_fn(\"self_attn_o_proj\"))\n",
-        "model.model.layers[0].block_sparse_moe.gate.register_forward_hook(hook_fn(\"block_sparse_moe_gate\"))"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:30:40.68794Z",
-          "iopub.execute_input": "2024-10-18T18:30:40.688277Z",
-          "iopub.status.idle": "2024-10-18T18:30:40.705334Z",
-          "shell.execute_reply.started": "2024-10-18T18:30:40.688245Z",
-          "shell.execute_reply": "2024-10-18T18:30:40.704462Z"
-        },
-        "id": "8LONwVHwAAa1",
-        "outputId": "8cc24463-f06b-4069-b873-af792beecfd5"
-      },
-      "outputs": [
-        {
-          "execution_count": 7,
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": "<torch.utils.hooks.RemovableHandle at 0x7d29af68c820>"
-          },
-          "metadata": {}
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Forward Pass"
-      ],
-      "metadata": {
-        "id": "vORmhXXTAAa3"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "input_text = \"The quick brown fox jumps over the lazy dog !\"\n",
-        "\n",
-        "# Tokenize the input text\n",
-        "inputs = tokenizer(input_text, return_tensors=\"pt\")\n",
-        "print(\"Tokenized inputs {'input_ids','attention_mask'} - \",inputs)\n",
-        "print(\"Decoded tokens : \",tokenizer.decode(inputs['input_ids'][0]))"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:30:40.706383Z",
-          "iopub.execute_input": "2024-10-18T18:30:40.706895Z",
-          "iopub.status.idle": "2024-10-18T18:30:40.728093Z",
-          "shell.execute_reply.started": "2024-10-18T18:30:40.706863Z",
-          "shell.execute_reply": "2024-10-18T18:30:40.727243Z"
-        },
-        "id": "dcC4RjNTAAa4",
-        "outputId": "f0d4c2e3-b7f6-471d-9b9b-ce5316c47431"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "text": "Tokenized inputs {'input_ids','attention_mask'} -  {'input_ids': tensor([[    1,   415,  2936,  9060,   285,  1142,   461, 10575,   754,   272,\n         17898,  3914,   918]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\nDecoded tokens :  <s> The quick brown fox jumps over the lazy dog !\n",
-          "output_type": "stream"
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "with torch.no_grad():\n",
-        "    model_output = model(**inputs)"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:30:40.729287Z",
-          "iopub.execute_input": "2024-10-18T18:30:40.729872Z",
-          "iopub.status.idle": "2024-10-18T18:36:43.660892Z",
-          "shell.execute_reply.started": "2024-10-18T18:30:40.72983Z",
-          "shell.execute_reply": "2024-10-18T18:36:43.660087Z"
-        },
-        "id": "4x2A5-m-AAa6",
-        "outputId": "d0fc43d2-1229-4582-d4d3-6b5f745be24e"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "text": "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n",
-          "output_type": "stream"
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "for layer, output in outputs.items():\n",
-        "    print(f\"Output at {layer}: \")\n",
-        "    if isinstance(output, torch.Tensor):\n",
-        "        print(output.shape, type(output))\n",
-        "    elif isinstance(output, tuple):\n",
-        "        for i, o in enumerate(output):\n",
-        "            print(f\"Output {i}: {o.shape if isinstance(o, torch.Tensor) else type(o)}\")\n",
-        "    else:\n",
-        "        print(type(output))\n",
-        "    print(\"-\" * 100)"
-      ],
-      "metadata": {
-        "trusted": true,
-        "execution": {
-          "iopub.status.busy": "2024-10-18T18:56:11.28238Z",
-          "iopub.execute_input": "2024-10-18T18:56:11.283252Z",
-          "iopub.status.idle": "2024-10-18T18:56:11.291437Z",
-          "shell.execute_reply.started": "2024-10-18T18:56:11.283214Z",
-          "shell.execute_reply": "2024-10-18T18:56:11.290478Z"
-        },
-        "id": "xVuaYV3pAAa7",
-        "outputId": "84e2f36e-0f10-4be1-9fdd-581fe61fabb1"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "text": "Output at embed_tokens: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_layer_1: \nOutput 0: torch.Size([1, 13, 4096])\nOutput 1: <class 'NoneType'>\nOutput 2: <class 'transformers.cache_utils.DynamicCache'>\n----------------------------------------------------------------------------------------------------\nOutput at block_sparse_moe_experts: \ntorch.Size([3, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at post_attention_layernorm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at norm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at lm_head: \ntorch.Size([1, 13, 32000]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at input_layernorm: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_q_proj: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_k_proj: \ntorch.Size([1, 13, 1024]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_v_proj: \ntorch.Size([1, 13, 1024]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at self_attn_o_proj: \ntorch.Size([1, 13, 4096]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\nOutput at block_sparse_moe_gate: \ntorch.Size([13, 8]) <class 'torch.Tensor'>\n----------------------------------------------------------------------------------------------------\n",
-          "output_type": "stream"
-        }
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "\n",
-        "### Explanation of Shapes:\n",
-        "\n",
-        "### 1. **embed_tokens**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size (number of sequences in this batch, here it's 1 sequence).\n",
-        "  - `13`: Sequence length (the number of tokens in the input, here 13 tokens).\n",
-        "  - `4096`: Embedding size (each token is mapped to a 4096-dimensional vector).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, embed_dim)`\n",
-        "\n",
-        "### 2. **self_attn_layer_1**\n",
-        "- **Shape (Output 0):** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Hidden size (output of the attention mechanism).\n",
-        "\n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 3. **block_sparse_moe_experts**\n",
-        "- **Shape:** `torch.Size([3, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `3`: Number of activated experts for the MoE (Mixture of Experts) layer. In the `block_sparse_moe`, two experts are chosen per token (2 tokens in the batch may activate the same experts, hence the 3).\n",
-        "  - `4096`: Expert embedding size (the dimensionality of the expert's output).\n",
-        "  \n",
-        "  **Shape Format:** `(num_experts, expert_embed_dim)`\n",
-        "\n",
-        "### 4. **post_attention_layernorm**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Output dimension after the post-attention normalization step.\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 5. **norm**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Output dimension after applying the final normalization layer.\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 6. **lm_head**\n",
-        "- **Shape:** `torch.Size([1, 13, 32000])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `32000`: Vocabulary size (logits over the vocabulary for each token in the sequence).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, vocab_size)`\n",
-        "\n",
-        "### 7. **input_layernorm**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Output dimension after the input layer normalization step.\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 8. **self_attn_q_proj**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Query projection size (the hidden state is projected to the query vector space).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 9. **self_attn_k_proj**\n",
-        "- **Shape:** `torch.Size([1, 13, 1024])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `1024`: Key projection size (here, the key is projected to a smaller dimensional space compared to queries/values).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, key_dim)`\n",
-        "\n",
-        "### 10. **self_attn_v_proj**\n",
-        "- **Shape:** `torch.Size([1, 13, 1024])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `1024`: Value projection size (the values are also projected to the same size as the keys).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, value_dim)`\n",
-        "\n",
-        "### 11. **self_attn_o_proj**\n",
-        "- **Shape:** `torch.Size([1, 13, 4096])`\n",
-        "- **Explanation:**\n",
-        "  - `1`: Batch size.\n",
-        "  - `13`: Sequence length.\n",
-        "  - `4096`: Output projection size (the final result after the attention mechanism is projected back to the original hidden dimension).\n",
-        "  \n",
-        "  **Shape Format:** `(batch_size, seq_len, hidden_dim)`\n",
-        "\n",
-        "### 12. **block_sparse_moe_gate**\n",
-        "- **Shape:** `torch.Size([13, 8])`\n",
-        "- **Explanation:**\n",
-        "  - `13`: Sequence length (the gate operates per token).\n",
-        "  - `8`: Number of experts (gating decisions are made over all available experts).\n",
-        "  \n",
-        "  **Shape Format:** `(seq_len, num_experts)`\n",
-        "\n",
-        "### Summary Table:\n",
-        "\n",
-        "| Layer Name                   | Shape Format                      | Dimensions | Notes                                                      |\n",
-        "|------------------------------|------------------------------------|------------|------------------------------------------------------------|\n",
-        "| `embed_tokens`               | `(batch_size, seq_len, embed_dim)` | `[1, 13, 4096]` | Embedding tokens from vocabulary.                           |\n",
-        "| `self_attn_layer_1`           | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Output of first attention layer.                            |\n",
-        "| `block_sparse_moe_experts`    | `(num_experts, expert_embed_dim)`  | `[3, 4096]` | Expert outputs in MoE block.                                |\n",
-        "| `post_attention_layernorm`    | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Layer norm after attention.                                 |\n",
-        "| `norm`                       | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Final normalization layer.                                  |\n",
-        "| `lm_head`                    | `(batch_size, seq_len, vocab_size)`| `[1, 13, 32000]` | Logits for each token over the vocabulary.                  |\n",
-        "| `input_layernorm`             | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Input layer normalization.                                  |\n",
-        "| `self_attn_q_proj`            | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Query projection in self-attention.                         |\n",
-        "| `self_attn_k_proj`            | `(batch_size, seq_len, key_dim)`   | `[1, 13, 1024]` | Key projection in self-attention.                           |\n",
-        "| `self_attn_v_proj`            | `(batch_size, seq_len, value_dim)` | `[1, 13, 1024]` | Value projection in self-attention.                         |\n",
-        "| `self_attn_o_proj`            | `(batch_size, seq_len, hidden_dim)`| `[1, 13, 4096]` | Output projection after attention.                          |\n",
-        "| `block_sparse_moe_gate`       | `(seq_len, num_experts)`           | `[13, 8]`   | Gating decisions for the mixture of experts.                |\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "FyugDOzXAAa8"
-      }
-    }
-  ]
-}
\ No newline at end of file
diff --git a/llm_experiments/README.md b/llm_experiments/README.md
deleted file mode 100644
index 06283bf31a07..000000000000
--- a/llm_experiments/README.md
+++ /dev/null
@@ -1,89 +0,0 @@
-
-
-
-# LLM-Experiment Series
-
-Welcome to the LL-Experiment series! This series of notebooks and scripts aims to provide a comprehensive guide on investigating the internal workings of Large Language Models (LLMs), understanding how they process inputs, and experimenting with their architectures.
-
-## Table of Contents
-
-- [Introduction](#introduction)
-- [Series Overview](#series-overview)
-- [Getting Started](#getting-started)
-- [Notebooks and Scripts](#notebooks-and-scripts)
-- [Contributing](#contributing)
-- [License](#license)
-
-## Introduction
-
-Large Language Models (LLMs) have revolutionized the field of natural language processing (NLP) by achieving state-of-the-art performance on various tasks. However, understanding their internal workings and how they process inputs can be challenging. This series aims to demystify LLMs by providing detailed explanations, hands-on experiments, and practical tips for tweaking their architectures.
-
-## Series Overview
-
-The LLM-Experiment series will cover the following topics:
-
-1. **Understanding LLM Architectures**:
-   - An overview of popular open source LLM architectures like Whisper, Llama, and Mixtral.
-   - Key trouble shooting during experimentation
-
-2. **Investigating Input Processing**:
-   - How inputs are tokenized and embedded.
-   - The role of attention mechanisms in processing sequences.
-   - Visualizing and analyzing the outputs at various layers of the model.
-
-3. **Tweaking LLM Architectures**:
-   - Experimenting with different configurations and hyperparameters.
-   - Modifying existing LLM architectures to improve performance or adapt to specific tasks.
-   - Implementing custom layers and components.
-
-4. **Conducting New Experiments**:
-   - Designing and implementing new experiments to test hypotheses about LLM behavior.
-   - Evaluating the impact of architectural changes on model performance.
-   - Sharing insights and findings with the community.
-
-## Getting Started
-
-To get started with the LLM-Experiment series, you will need the following:
-
-1. **Python Environment**:
-   - All these notebooks are created in Kaggle or Google Colab, So it's recommended to use the same to reproduce the results for other models
-
-
-2. **Hugging Face Account**:
-   - Create a Hugging Face account and obtain an API token.
-   - Login to Hugging Face using the provided token or username and token.
-   - Most of the Mistral,Llama models needs some sort of Agreement acceptance
-
-3. **Notebooks and Scripts**:
-   - Clone this repository to access the notebooks and scripts or you can directly open in Google Colab 
-   - Follow the instructions in each notebook to run the experiments and analyze the results.
-
-## Notebooks and Scripts
-
-The series will include the following notebooks and scripts:
-
-1. **Mixtral Model Analysis**:
-   - Analyzing the architecture and configuration of the Mixtral model.
-   - Registering hooks to capture the outputs at various layers.
-
-2. **Input Processing and Embedding**: - Upcoming
-
-
-3. **Attention Mechanisms and improvements**: - Upcoming
-
-
-4. **Rolling Buffer,KV-cache,Sliding Window Attention**: - Upcoming
-
-
-5. **Tweaking Model Architectures - Adapters,Down-Casting**: - Upcoming
-   
-
-## Contributing
-
-We welcome contributions from the community! If you have any ideas, suggestions, or improvements, please feel free to open an issue or submit a pull request.
-
-## License
-
-This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
-
-

From f3d43e869454a7ffbfdfdbe4d41b0635b85081eb Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 10:11:04 +0530
Subject: [PATCH 05/27] Create README.md

---
 neural_network/chatbot/README.md | 55 ++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 neural_network/chatbot/README.md

diff --git a/neural_network/chatbot/README.md b/neural_network/chatbot/README.md
new file mode 100644
index 000000000000..f7c216561074
--- /dev/null
+++ b/neural_network/chatbot/README.md
@@ -0,0 +1,55 @@
+# Chatbot with Chat history stored in Database
+
+This project is a simple chatbot application built using Python, integrating a database for chat history storage and a language model service to generate responses. The chatbot can handle user messages, manage chat history, and terminate conversations upon receiving a `/stop` command.
+
+## Features
+- **Conversation Handling**: The bot processes user inputs and generates responses using a language model service.
+- **Database Integration**: Stores chat data (user messages and bot responses) and maintains chat history.
+- **Session Management**: Supports starting and terminating chat sessions, including proper logging of start and end times.
+- **Message Truncation**: Limits conversation history to the last few messages if the conversation exceeds a large number of entries.
+
+## Components
+- **`Chatbot` Class**: Core logic for handling user messages and managing the chat lifecycle.
+- **`Database` (Mocked in tests)**: Handles chat data storage (methods for inserting and retrieving data).
+- **`LLM Service` (Mocked in tests)**: Generates responses to user input based on conversation history.
+
+## Installation
+1. Clone the repository:
+2. Install the necessary dependencies
+   ```bash
+   pip3 install requirements.txt
+   ```
+4. Run the bot or test it using `doctest`:
+    ```bash
+    python3 -m doctest -v chatbot.py
+    ```
+
+## Usage
+1. **Create Database**: Create a databse named `ChatDB` in Mysql
+2. **Create .env**:
+```
+  # Together API key
+  TOGETHER_API_KEY="YOUR_API_KEY"
+  
+  # Groq API key
+  GROQ_API_KEY = "YOUR_API_KEY"
+  
+  # MySQL connectionDB (if you're running locally)
+  DB_USER = "<DB_USER_NAME>"
+  DB_PASSWORD = "<DB_USER_NAME>"
+  DB_HOST = "127.0.0.1"
+  DB_NAME = "ChatDB"
+  PORT = "3306"
+ ```
+7. **Handling Messages**: run below command to start the chat in console, you can login to your Database to check the chat history
+```python
+python3 main.py
+```
+10. **Ending the Chat**: When the user sends `/stop`, the chat will terminate and log the end of the conversation with the message 'conversation-terminated'
+
+## Testing
+The code includes basic `doctests` to verify the chatbot's functionality using mock services for the database and language model:
+- Run the tests:
+    ```bash
+    python3 -m doctest -v chatbot.py
+    ```

From 2dad12b898385d16e4e1a075d2613922ac62e0d5 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 10:12:46 +0530
Subject: [PATCH 06/27] Add files via upload

---
 neural_network/chatbot/chatbot.py       | 134 ++++++++++++++++
 neural_network/chatbot/db.py            | 199 ++++++++++++++++++++++++
 neural_network/chatbot/llm_service.py   |  78 ++++++++++
 neural_network/chatbot/main.py          |  44 ++++++
 neural_network/chatbot/requirements.txt |  57 +++++++
 5 files changed, 512 insertions(+)
 create mode 100644 neural_network/chatbot/chatbot.py
 create mode 100644 neural_network/chatbot/db.py
 create mode 100644 neural_network/chatbot/llm_service.py
 create mode 100644 neural_network/chatbot/main.py
 create mode 100644 neural_network/chatbot/requirements.txt

diff --git a/neural_network/chatbot/chatbot.py b/neural_network/chatbot/chatbot.py
new file mode 100644
index 000000000000..38488349ff82
--- /dev/null
+++ b/neural_network/chatbot/chatbot.py
@@ -0,0 +1,134 @@
+import datetime
+from typing import List, Dict, Any
+
+
+class Chatbot:
+    """
+    A Chatbot class to manage chat conversations using an LLM service and a database to store chat data.
+
+    Methods:
+    - start_chat: Starts a new conversation, logs the start time.
+    - handle_user_message: Processes user input and stores user message & bot response in DB.
+    - end_chat: Ends the conversation and logs the end time.
+    - continue_chat: Retains only the last few messages if the conversation exceeds 1000 messages.
+    """
+
+    def __init__(self, db: Any, llm_service: Any) -> None:
+        """
+        Initialize the Chatbot with a database and an LLM service.
+
+        Parameters:
+        - db: The database instance used for storing chat data.
+        - llm_service: The language model service for generating responses.
+        """
+        self.db = db
+        self.llm_service = llm_service
+        self.conversation_history: List[Dict[str, str]] = []
+        self.chat_id_pk: int = None
+
+    def start_chat(self) -> None:
+        """
+        Start a new chat session and insert chat history to the database.
+        """
+        start_time = datetime.datetime.now()
+        is_stream = 1  # Start new conversation
+        self.db.insert_chat_history(start_time, is_stream)
+        self.chat_id_pk = self.db.get_latest_chat_id()
+
+    def handle_user_message(self, user_input: str) -> str:
+        """
+        Handle user input and generate a bot response.
+        If the user sends '/stop', the conversation is terminated.
+
+        Parameters:
+        - user_input: The input provided by the user.
+
+        Returns:
+        - bot_response: The response generated by the bot.
+
+        Raises:
+        - ValueError: If user input is not a string or if no chat_id is available.
+
+        Doctest:
+        >>> class MockDatabase:
+        ...     def __init__(self):
+        ...         self.data = []
+        ...     def insert_chat_data(self, *args, **kwargs):
+        ...         pass
+        ...     def insert_chat_history(self, *args, **kwargs):
+        ...         pass
+        ...     def get_latest_chat_id(self):
+        ...         return 1
+        ...
+        >>> class MockLLM:
+        ...     def generate_response(self, conversation_history):
+        ...         if conversation_history[-1]["content"] == "/stop":
+        ...             return "conversation-terminated"
+        ...         return "Mock response"
+        >>> db_mock = MockDatabase()
+        >>> llm_mock = MockLLM()
+        >>> bot = Chatbot(db_mock, llm_mock)
+        >>> bot.start_chat()
+        >>> bot.handle_user_message("/stop")
+        'conversation-terminated'
+        >>> bot.handle_user_message("Hello!")
+        'Mock response'
+        """
+        if not isinstance(user_input, str):
+            raise ValueError("User input must be a string.")
+
+        if self.chat_id_pk is None:
+            raise ValueError("Chat has not been started. Call start_chat() first.")
+
+        self.conversation_history.append({"role": "user", "content": user_input})
+
+        if user_input == "/stop":
+            self.end_chat()
+            return "conversation-terminated"
+        else:
+            bot_response = self.llm_service.generate_response(self.conversation_history)
+            print(f"Bot : ",bot_response)
+            self.conversation_history.append(
+                {"role": "assistant", "content": bot_response}
+            )
+            self._store_message_in_db(user_input, bot_response)
+
+        return bot_response
+
+    def _store_message_in_db(self, user_input: str, bot_response: str) -> None:
+        """
+        Store user input and bot response in the database.
+
+        Parameters:
+        - user_input: The message from the user.
+        - bot_response: The response generated by the bot.
+
+        Raises:
+        - ValueError: If insertion into the database fails.
+        """
+        try:
+            self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
+        except Exception as e:
+            raise ValueError(f"Failed to insert chat data: {e}")
+
+    def end_chat(self) -> None:
+        """
+        End the chat session and update the chat history in the database.
+        """
+        current_time = datetime.datetime.now()
+        is_stream = 2  # End of conversation
+        try:
+            user_input = "/stop"
+            bot_response = "conversation-terminated"
+            print(f"Bot : ",bot_response)
+            self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
+            self.db.insert_chat_history(current_time, is_stream)
+        except Exception as e:
+            raise ValueError(f"Failed to update chat history: {e}")
+
+    def continue_chat(self) -> None:
+        """
+        Retain only the last few entries if the conversation exceeds 1000 messages.
+        """
+        if len(self.conversation_history) > 1000:
+            self.conversation_history = self.conversation_history[-3:]
diff --git a/neural_network/chatbot/db.py b/neural_network/chatbot/db.py
new file mode 100644
index 000000000000..92ef6909cabe
--- /dev/null
+++ b/neural_network/chatbot/db.py
@@ -0,0 +1,199 @@
+import os
+from dotenv import load_dotenv
+import mysql.connector
+from mysql.connector import MySQLConnection
+
+load_dotenv()
+
+
+class Database:
+    """
+    A class to manage the connection to the MySQL database using configuration from environment variables.
+
+    Attributes:
+    -----------
+    config : dict
+        The database connection parameters like user, password, host, and database name.
+    """
+
+    def __init__(self) -> None:
+        self.config = {
+            "user": os.environ.get("DB_USER"),
+            "password": os.environ.get("DB_PASSWORD"),
+            "host": os.environ.get("DB_HOST"),
+            "database": os.environ.get("DB_NAME"),
+        }
+
+    def connect(self) -> MySQLConnection:
+        """
+        Establish a connection to the MySQL database.
+
+        Returns:
+        --------
+        MySQLConnection
+            A connection object for interacting with the MySQL database.
+
+        Raises:
+        -------
+        mysql.connector.Error
+            If the connection to the database fails.
+        """
+        return mysql.connector.connect(**self.config)
+
+
+class ChatDatabase:
+    """
+    A class to manage chat-related database operations, such as creating tables,
+    inserting chat history, and retrieving chat data.
+
+    Attributes:
+    -----------
+    db : Database
+        An instance of the `Database` class for establishing connections to the MySQL database.
+    """
+
+    def __init__(self, db: Database) -> None:
+        self.db = db
+
+    def create_tables(self) -> None:
+        """
+        Create the necessary tables for chat history and chat data in the database.
+        If the tables already exist, they will not be created again.
+
+        Raises:
+        -------
+        mysql.connector.Error
+            If there is any error executing the SQL statements.
+        """
+        conn = self.db.connect()
+        cursor = conn.cursor()
+
+        cursor.execute(
+            """
+        CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
+            chat_id INT AUTO_INCREMENT PRIMARY KEY,
+            start_time DATETIME,
+            is_stream INT
+        )
+        """
+        )
+
+        cursor.execute(
+            """
+        CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
+            id INT AUTO_INCREMENT PRIMARY KEY,
+            chat_id INT,
+            user TEXT,
+            assistant TEXT,
+            FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
+        )
+        """
+        )
+
+        cursor.execute("DROP TRIGGER IF EXISTS update_is_stream")
+
+        cursor.execute(
+            """
+        CREATE TRIGGER update_is_stream
+        AFTER UPDATE ON ChatDB.Chat_history
+        FOR EACH ROW
+        BEGIN
+            UPDATE ChatDB.Chat_data
+            SET is_stream = NEW.is_stream
+            WHERE chat_id = NEW.chat_id;
+        END;
+        """
+        )
+
+        conn.commit()
+        cursor.close()
+        conn.close()
+
+    def insert_chat_history(self, start_time: str, is_stream: int) -> None:
+        """
+        Insert a new chat history record into the database.
+
+        Parameters:
+        -----------
+        start_time : str
+            The starting time of the chat session.
+        is_stream : int
+            An integer indicating whether the chat is in progress (1) or ended (2).
+
+        Raises:
+        -------
+        mysql.connector.Error
+            If there is any error executing the SQL statements.
+        """
+        conn = self.db.connect()
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            INSERT INTO ChatDB.Chat_history (start_time, is_stream)
+            VALUES (%s, %s)
+        """,
+            (start_time, is_stream),
+        )
+        conn.commit()
+        cursor.close()
+        conn.close()
+
+    def get_latest_chat_id(self) -> int:
+        """
+        Retrieve the chat ID of the most recent chat session from the database.
+
+        Returns:
+        --------
+        int
+            The ID of the latest chat session.
+
+        Raises:
+        -------
+        mysql.connector.Error
+            If there is any error executing the SQL statements.
+        """
+        conn = self.db.connect()
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            SELECT chat_id FROM ChatDB.Chat_history WHERE 
+            chat_id=(SELECT MAX(chat_id) FROM ChatDB.Chat_history)
+        """
+        )
+        chat_id_pk = cursor.fetchone()[0]
+        cursor.close()
+        conn.close()
+        return chat_id_pk
+
+    def insert_chat_data(
+        self, chat_id: int, user_message: str, assistant_message: str
+    ) -> None:
+        """
+        Insert a new chat data record into the database.
+
+        Parameters:
+        -----------
+        chat_id : int
+            The ID of the chat session to which this data belongs.
+        user_message : str
+            The message provided by the user in the chat session.
+        assistant_message : str
+            The response from the assistant in the chat session.
+
+        Raises:
+        -------
+        mysql.connector.Error
+            If there is any error executing the SQL statements.
+        """
+        conn = self.db.connect()
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
+            VALUES (%s, %s, %s)
+        """,
+            (chat_id, user_message, assistant_message),
+        )
+        conn.commit()
+        cursor.close()
+        conn.close()
diff --git a/neural_network/chatbot/llm_service.py b/neural_network/chatbot/llm_service.py
new file mode 100644
index 000000000000..f1203f642332
--- /dev/null
+++ b/neural_network/chatbot/llm_service.py
@@ -0,0 +1,78 @@
+import os
+from together import Together
+from groq import Groq
+from dotenv import load_dotenv
+from typing import List, Dict
+
+load_dotenv()
+
+
+class LLMService:
+    """
+    A class to interact with different LLM (Large Language Model) API services, such as Together and Groq.
+
+    Attributes:
+    -----------
+    api_service : str
+        The name of the API service to use ("Together" or "Groq").
+    """
+
+    def __init__(self, api_service: str) -> None:
+        """
+        Initialize the LLMService with a specific API service.
+
+        Parameters:
+        -----------
+        api_service : str
+            The name of the LLM API service, either "Together" or "Groq".
+        """
+        self.api_service = api_service
+
+    def generate_response(self, conversation_history: List[Dict[str, str]]) -> str:
+        """
+        Generate a response from the specified LLM API based on the conversation history.
+
+        Parameters:
+        -----------
+        conversation_history : List[Dict[str, str]]
+            The list of conversation messages, where each message is a dictionary with 'role' and 'content' keys.
+
+        Returns:
+        --------
+        str
+            The generated response content from the assistant.
+
+        Raises:
+        -------
+        ValueError
+            If the specified API service is neither "Together" nor "Groq".
+        """
+        if self.api_service == "Together":
+            client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
+            response = client.chat.completions.create(
+                model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
+                messages=conversation_history,
+                max_tokens=512,
+                temperature=0.3,
+                top_p=0.7,
+                top_k=50,
+                repetition_penalty=1,
+                stop=["<|eot_id|>", "<|eom_id|>"],
+                stream=False,
+            )
+        elif self.api_service == "Groq":
+            client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+            response = client.chat.completions.create(
+                model="llama3-8b-8192",
+                messages=conversation_history,
+                max_tokens=1024,
+                temperature=0.3,
+                top_p=0.7,
+                stop=["<|eot_id|>", "<|eom_id|>"],
+                stream=False,
+            )
+        else:
+            raise ValueError(f"Unsupported API service: {self.api_service}")
+
+        # Extracting the content of the generated response
+        return response.choices[0].message.content
diff --git a/neural_network/chatbot/main.py b/neural_network/chatbot/main.py
new file mode 100644
index 000000000000..cdbd631c7783
--- /dev/null
+++ b/neural_network/chatbot/main.py
@@ -0,0 +1,44 @@
+from db import Database, ChatDatabase
+from llm_service import LLMService
+from chatbot import Chatbot
+from typing import NoReturn
+
+
+def main() -> NoReturn:
+    """
+    Main function to initialize and start the chatbot application.
+
+    This function initializes the database and LLM service, creates necessary tables, and starts
+    the chatbot for user interaction.
+    """
+    # Initialize and configure the database
+    db = Database()
+    chat_db = ChatDatabase(db)
+    chat_db.create_tables()
+
+    # Set the API service to either "Together" or "Groq"
+    api_service = (
+        "Groq"  # Can be set dynamically based on user preference or environment
+    )
+    llm_service = LLMService(api_service)
+
+    # Initialize the Chatbot with the database and LLM service
+    chatbot = Chatbot(chat_db, llm_service)
+
+    print("Welcome to the chatbot! Type '/stop' to end the conversation.")
+    chatbot.start_chat()
+
+    # Chat loop to handle user input
+    while True:
+        user_input = input("\nYou: ")
+        if user_input.strip().lower() == "/stop":
+            chatbot.end_chat()  # End the conversation if user types "/stop"
+            break
+        chatbot.handle_user_message(
+            user_input
+        )  # Process user input and generate response
+        chatbot.continue_chat()  # Handle long conversations (trim history if necessary)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/neural_network/chatbot/requirements.txt b/neural_network/chatbot/requirements.txt
new file mode 100644
index 000000000000..0f1204243a5d
--- /dev/null
+++ b/neural_network/chatbot/requirements.txt
@@ -0,0 +1,57 @@
+aiohappyeyeballs==2.4.2
+aiohttp==3.10.8
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.0
+asgiref==3.8.1
+attrs==24.2.0
+black==24.10.0
+certifi==2024.8.30
+cfgv==3.4.0
+charset-normalizer==3.3.2
+click==8.1.7
+distlib==0.3.9
+distro==1.9.0
+Django==5.1.1
+djangorestframework==3.15.2
+eval_type_backport==0.2.0
+filelock==3.16.1
+frozenlist==1.4.1
+groq==0.11.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+identify==2.6.1
+idna==3.10
+markdown-it-py==3.0.0
+mdurl==0.1.2
+multidict==6.1.0
+mypy-extensions==1.0.0
+mysql-connector-python==9.0.0
+nodeenv==1.9.1
+numpy==2.1.1
+packaging==24.1
+pathspec==0.12.1
+pillow==10.4.0
+platformdirs==4.3.6
+pre_commit==4.0.1
+pyarrow==17.0.0
+pydantic==2.9.2
+pydantic_core==2.23.4
+Pygments==2.18.0
+python-dotenv==1.0.1
+PyYAML==6.0.2
+requests==2.32.3
+rich==13.8.1
+ruff==0.7.0
+shellingham==1.5.4
+sniffio==1.3.1
+sqlparse==0.5.1
+tabulate==0.9.0
+together==1.3.0
+tqdm==4.66.5
+typer==0.12.5
+typing_extensions==4.12.2
+urllib3==2.2.3
+virtualenv==20.27.0
+yarl==1.13.1

From 4ecdca19d673d721c5c744a238a437e5b3c72146 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 19 Oct 2024 04:49:54 +0000
Subject: [PATCH 07/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/chatbot/README.md  | 4 ++--
 neural_network/chatbot/chatbot.py | 4 ++--
 neural_network/chatbot/db.py      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/neural_network/chatbot/README.md b/neural_network/chatbot/README.md
index f7c216561074..5e8f86b0c6fd 100644
--- a/neural_network/chatbot/README.md
+++ b/neural_network/chatbot/README.md
@@ -30,10 +30,10 @@ This project is a simple chatbot application built using Python, integrating a d
 ```
   # Together API key
   TOGETHER_API_KEY="YOUR_API_KEY"
-  
+
   # Groq API key
   GROQ_API_KEY = "YOUR_API_KEY"
-  
+
   # MySQL connectionDB (if you're running locally)
   DB_USER = "<DB_USER_NAME>"
   DB_PASSWORD = "<DB_USER_NAME>"
diff --git a/neural_network/chatbot/chatbot.py b/neural_network/chatbot/chatbot.py
index 38488349ff82..2456ced5ee35 100644
--- a/neural_network/chatbot/chatbot.py
+++ b/neural_network/chatbot/chatbot.py
@@ -87,7 +87,7 @@ def handle_user_message(self, user_input: str) -> str:
             return "conversation-terminated"
         else:
             bot_response = self.llm_service.generate_response(self.conversation_history)
-            print(f"Bot : ",bot_response)
+            print(f"Bot : ", bot_response)
             self.conversation_history.append(
                 {"role": "assistant", "content": bot_response}
             )
@@ -120,7 +120,7 @@ def end_chat(self) -> None:
         try:
             user_input = "/stop"
             bot_response = "conversation-terminated"
-            print(f"Bot : ",bot_response)
+            print(f"Bot : ", bot_response)
             self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
             self.db.insert_chat_history(current_time, is_stream)
         except Exception as e:
diff --git a/neural_network/chatbot/db.py b/neural_network/chatbot/db.py
index 92ef6909cabe..3572a699ea57 100644
--- a/neural_network/chatbot/db.py
+++ b/neural_network/chatbot/db.py
@@ -156,7 +156,7 @@ def get_latest_chat_id(self) -> int:
         cursor = conn.cursor()
         cursor.execute(
             """
-            SELECT chat_id FROM ChatDB.Chat_history WHERE 
+            SELECT chat_id FROM ChatDB.Chat_history WHERE
             chat_id=(SELECT MAX(chat_id) FROM ChatDB.Chat_history)
         """
         )

From f8510d702eea331a94663769e84bd6959d25537b Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sat, 19 Oct 2024 10:31:17 +0530
Subject: [PATCH 08/27] Add files via upload

---
 neural_network/chatbot/chatbot.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/neural_network/chatbot/chatbot.py b/neural_network/chatbot/chatbot.py
index 2456ced5ee35..9de733c94e21 100644
--- a/neural_network/chatbot/chatbot.py
+++ b/neural_network/chatbot/chatbot.py
@@ -83,11 +83,13 @@ def handle_user_message(self, user_input: str) -> str:
         self.conversation_history.append({"role": "user", "content": user_input})
 
         if user_input == "/stop":
+            bot_response = "conversation-terminated"
+            # print(f"Bot: {bot_response}")
             self.end_chat()
-            return "conversation-terminated"
+            return bot_response
         else:
             bot_response = self.llm_service.generate_response(self.conversation_history)
-            print(f"Bot : ", bot_response)
+            # print(f"Bot: {bot_response}")
             self.conversation_history.append(
                 {"role": "assistant", "content": bot_response}
             )
@@ -120,7 +122,6 @@ def end_chat(self) -> None:
         try:
             user_input = "/stop"
             bot_response = "conversation-terminated"
-            print(f"Bot : ", bot_response)
             self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
             self.db.insert_chat_history(current_time, is_stream)
         except Exception as e:

From fb102e6ef2c5306bb9d40bf46bb70421ca854dc4 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 11:41:49 +0530
Subject: [PATCH 09/27] Delete neural_network/chatbot/main.py

---
 neural_network/chatbot/main.py | 44 ----------------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 neural_network/chatbot/main.py

diff --git a/neural_network/chatbot/main.py b/neural_network/chatbot/main.py
deleted file mode 100644
index cdbd631c7783..000000000000
--- a/neural_network/chatbot/main.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from db import Database, ChatDatabase
-from llm_service import LLMService
-from chatbot import Chatbot
-from typing import NoReturn
-
-
-def main() -> NoReturn:
-    """
-    Main function to initialize and start the chatbot application.
-
-    This function initializes the database and LLM service, creates necessary tables, and starts
-    the chatbot for user interaction.
-    """
-    # Initialize and configure the database
-    db = Database()
-    chat_db = ChatDatabase(db)
-    chat_db.create_tables()
-
-    # Set the API service to either "Together" or "Groq"
-    api_service = (
-        "Groq"  # Can be set dynamically based on user preference or environment
-    )
-    llm_service = LLMService(api_service)
-
-    # Initialize the Chatbot with the database and LLM service
-    chatbot = Chatbot(chat_db, llm_service)
-
-    print("Welcome to the chatbot! Type '/stop' to end the conversation.")
-    chatbot.start_chat()
-
-    # Chat loop to handle user input
-    while True:
-        user_input = input("\nYou: ")
-        if user_input.strip().lower() == "/stop":
-            chatbot.end_chat()  # End the conversation if user types "/stop"
-            break
-        chatbot.handle_user_message(
-            user_input
-        )  # Process user input and generate response
-        chatbot.continue_chat()  # Handle long conversations (trim history if necessary)
-
-
-if __name__ == "__main__":
-    main()

From 6e7a428e81edbed84735c07be14dad4eb69b8dea Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 11:42:09 +0530
Subject: [PATCH 10/27] Delete neural_network/chatbot/llm_service.py

---
 neural_network/chatbot/llm_service.py | 78 ---------------------------
 1 file changed, 78 deletions(-)
 delete mode 100644 neural_network/chatbot/llm_service.py

diff --git a/neural_network/chatbot/llm_service.py b/neural_network/chatbot/llm_service.py
deleted file mode 100644
index f1203f642332..000000000000
--- a/neural_network/chatbot/llm_service.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-from together import Together
-from groq import Groq
-from dotenv import load_dotenv
-from typing import List, Dict
-
-load_dotenv()
-
-
-class LLMService:
-    """
-    A class to interact with different LLM (Large Language Model) API services, such as Together and Groq.
-
-    Attributes:
-    -----------
-    api_service : str
-        The name of the API service to use ("Together" or "Groq").
-    """
-
-    def __init__(self, api_service: str) -> None:
-        """
-        Initialize the LLMService with a specific API service.
-
-        Parameters:
-        -----------
-        api_service : str
-            The name of the LLM API service, either "Together" or "Groq".
-        """
-        self.api_service = api_service
-
-    def generate_response(self, conversation_history: List[Dict[str, str]]) -> str:
-        """
-        Generate a response from the specified LLM API based on the conversation history.
-
-        Parameters:
-        -----------
-        conversation_history : List[Dict[str, str]]
-            The list of conversation messages, where each message is a dictionary with 'role' and 'content' keys.
-
-        Returns:
-        --------
-        str
-            The generated response content from the assistant.
-
-        Raises:
-        -------
-        ValueError
-            If the specified API service is neither "Together" nor "Groq".
-        """
-        if self.api_service == "Together":
-            client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
-            response = client.chat.completions.create(
-                model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
-                messages=conversation_history,
-                max_tokens=512,
-                temperature=0.3,
-                top_p=0.7,
-                top_k=50,
-                repetition_penalty=1,
-                stop=["<|eot_id|>", "<|eom_id|>"],
-                stream=False,
-            )
-        elif self.api_service == "Groq":
-            client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-            response = client.chat.completions.create(
-                model="llama3-8b-8192",
-                messages=conversation_history,
-                max_tokens=1024,
-                temperature=0.3,
-                top_p=0.7,
-                stop=["<|eot_id|>", "<|eom_id|>"],
-                stream=False,
-            )
-        else:
-            raise ValueError(f"Unsupported API service: {self.api_service}")
-
-        # Extracting the content of the generated response
-        return response.choices[0].message.content

From 922a2306b580bc6a27bcdb7ffef5a58ce32589d3 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 11:42:24 +0530
Subject: [PATCH 11/27] Delete neural_network/chatbot/chatbot.py

---
 neural_network/chatbot/chatbot.py | 135 ------------------------------
 1 file changed, 135 deletions(-)
 delete mode 100644 neural_network/chatbot/chatbot.py

diff --git a/neural_network/chatbot/chatbot.py b/neural_network/chatbot/chatbot.py
deleted file mode 100644
index 9de733c94e21..000000000000
--- a/neural_network/chatbot/chatbot.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import datetime
-from typing import List, Dict, Any
-
-
-class Chatbot:
-    """
-    A Chatbot class to manage chat conversations using an LLM service and a database to store chat data.
-
-    Methods:
-    - start_chat: Starts a new conversation, logs the start time.
-    - handle_user_message: Processes user input and stores user message & bot response in DB.
-    - end_chat: Ends the conversation and logs the end time.
-    - continue_chat: Retains only the last few messages if the conversation exceeds 1000 messages.
-    """
-
-    def __init__(self, db: Any, llm_service: Any) -> None:
-        """
-        Initialize the Chatbot with a database and an LLM service.
-
-        Parameters:
-        - db: The database instance used for storing chat data.
-        - llm_service: The language model service for generating responses.
-        """
-        self.db = db
-        self.llm_service = llm_service
-        self.conversation_history: List[Dict[str, str]] = []
-        self.chat_id_pk: int = None
-
-    def start_chat(self) -> None:
-        """
-        Start a new chat session and insert chat history to the database.
-        """
-        start_time = datetime.datetime.now()
-        is_stream = 1  # Start new conversation
-        self.db.insert_chat_history(start_time, is_stream)
-        self.chat_id_pk = self.db.get_latest_chat_id()
-
-    def handle_user_message(self, user_input: str) -> str:
-        """
-        Handle user input and generate a bot response.
-        If the user sends '/stop', the conversation is terminated.
-
-        Parameters:
-        - user_input: The input provided by the user.
-
-        Returns:
-        - bot_response: The response generated by the bot.
-
-        Raises:
-        - ValueError: If user input is not a string or if no chat_id is available.
-
-        Doctest:
-        >>> class MockDatabase:
-        ...     def __init__(self):
-        ...         self.data = []
-        ...     def insert_chat_data(self, *args, **kwargs):
-        ...         pass
-        ...     def insert_chat_history(self, *args, **kwargs):
-        ...         pass
-        ...     def get_latest_chat_id(self):
-        ...         return 1
-        ...
-        >>> class MockLLM:
-        ...     def generate_response(self, conversation_history):
-        ...         if conversation_history[-1]["content"] == "/stop":
-        ...             return "conversation-terminated"
-        ...         return "Mock response"
-        >>> db_mock = MockDatabase()
-        >>> llm_mock = MockLLM()
-        >>> bot = Chatbot(db_mock, llm_mock)
-        >>> bot.start_chat()
-        >>> bot.handle_user_message("/stop")
-        'conversation-terminated'
-        >>> bot.handle_user_message("Hello!")
-        'Mock response'
-        """
-        if not isinstance(user_input, str):
-            raise ValueError("User input must be a string.")
-
-        if self.chat_id_pk is None:
-            raise ValueError("Chat has not been started. Call start_chat() first.")
-
-        self.conversation_history.append({"role": "user", "content": user_input})
-
-        if user_input == "/stop":
-            bot_response = "conversation-terminated"
-            # print(f"Bot: {bot_response}")
-            self.end_chat()
-            return bot_response
-        else:
-            bot_response = self.llm_service.generate_response(self.conversation_history)
-            # print(f"Bot: {bot_response}")
-            self.conversation_history.append(
-                {"role": "assistant", "content": bot_response}
-            )
-            self._store_message_in_db(user_input, bot_response)
-
-        return bot_response
-
-    def _store_message_in_db(self, user_input: str, bot_response: str) -> None:
-        """
-        Store user input and bot response in the database.
-
-        Parameters:
-        - user_input: The message from the user.
-        - bot_response: The response generated by the bot.
-
-        Raises:
-        - ValueError: If insertion into the database fails.
-        """
-        try:
-            self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
-        except Exception as e:
-            raise ValueError(f"Failed to insert chat data: {e}")
-
-    def end_chat(self) -> None:
-        """
-        End the chat session and update the chat history in the database.
-        """
-        current_time = datetime.datetime.now()
-        is_stream = 2  # End of conversation
-        try:
-            user_input = "/stop"
-            bot_response = "conversation-terminated"
-            self.db.insert_chat_data(self.chat_id_pk, user_input, bot_response)
-            self.db.insert_chat_history(current_time, is_stream)
-        except Exception as e:
-            raise ValueError(f"Failed to update chat history: {e}")
-
-    def continue_chat(self) -> None:
-        """
-        Retain only the last few entries if the conversation exceeds 1000 messages.
-        """
-        if len(self.conversation_history) > 1000:
-            self.conversation_history = self.conversation_history[-3:]

From a1d4cd9839288ba3a44157175066aaa9436a6897 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 11:42:34 +0530
Subject: [PATCH 12/27] Delete neural_network/chatbot/db.py

---
 neural_network/chatbot/db.py | 199 -----------------------------------
 1 file changed, 199 deletions(-)
 delete mode 100644 neural_network/chatbot/db.py

diff --git a/neural_network/chatbot/db.py b/neural_network/chatbot/db.py
deleted file mode 100644
index 3572a699ea57..000000000000
--- a/neural_network/chatbot/db.py
+++ /dev/null
@@ -1,199 +0,0 @@
-import os
-from dotenv import load_dotenv
-import mysql.connector
-from mysql.connector import MySQLConnection
-
-load_dotenv()
-
-
-class Database:
-    """
-    A class to manage the connection to the MySQL database using configuration from environment variables.
-
-    Attributes:
-    -----------
-    config : dict
-        The database connection parameters like user, password, host, and database name.
-    """
-
-    def __init__(self) -> None:
-        self.config = {
-            "user": os.environ.get("DB_USER"),
-            "password": os.environ.get("DB_PASSWORD"),
-            "host": os.environ.get("DB_HOST"),
-            "database": os.environ.get("DB_NAME"),
-        }
-
-    def connect(self) -> MySQLConnection:
-        """
-        Establish a connection to the MySQL database.
-
-        Returns:
-        --------
-        MySQLConnection
-            A connection object for interacting with the MySQL database.
-
-        Raises:
-        -------
-        mysql.connector.Error
-            If the connection to the database fails.
-        """
-        return mysql.connector.connect(**self.config)
-
-
-class ChatDatabase:
-    """
-    A class to manage chat-related database operations, such as creating tables,
-    inserting chat history, and retrieving chat data.
-
-    Attributes:
-    -----------
-    db : Database
-        An instance of the `Database` class for establishing connections to the MySQL database.
-    """
-
-    def __init__(self, db: Database) -> None:
-        self.db = db
-
-    def create_tables(self) -> None:
-        """
-        Create the necessary tables for chat history and chat data in the database.
-        If the tables already exist, they will not be created again.
-
-        Raises:
-        -------
-        mysql.connector.Error
-            If there is any error executing the SQL statements.
-        """
-        conn = self.db.connect()
-        cursor = conn.cursor()
-
-        cursor.execute(
-            """
-        CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
-            chat_id INT AUTO_INCREMENT PRIMARY KEY,
-            start_time DATETIME,
-            is_stream INT
-        )
-        """
-        )
-
-        cursor.execute(
-            """
-        CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
-            id INT AUTO_INCREMENT PRIMARY KEY,
-            chat_id INT,
-            user TEXT,
-            assistant TEXT,
-            FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
-        )
-        """
-        )
-
-        cursor.execute("DROP TRIGGER IF EXISTS update_is_stream")
-
-        cursor.execute(
-            """
-        CREATE TRIGGER update_is_stream
-        AFTER UPDATE ON ChatDB.Chat_history
-        FOR EACH ROW
-        BEGIN
-            UPDATE ChatDB.Chat_data
-            SET is_stream = NEW.is_stream
-            WHERE chat_id = NEW.chat_id;
-        END;
-        """
-        )
-
-        conn.commit()
-        cursor.close()
-        conn.close()
-
-    def insert_chat_history(self, start_time: str, is_stream: int) -> None:
-        """
-        Insert a new chat history record into the database.
-
-        Parameters:
-        -----------
-        start_time : str
-            The starting time of the chat session.
-        is_stream : int
-            An integer indicating whether the chat is in progress (1) or ended (2).
-
-        Raises:
-        -------
-        mysql.connector.Error
-            If there is any error executing the SQL statements.
-        """
-        conn = self.db.connect()
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            INSERT INTO ChatDB.Chat_history (start_time, is_stream)
-            VALUES (%s, %s)
-        """,
-            (start_time, is_stream),
-        )
-        conn.commit()
-        cursor.close()
-        conn.close()
-
-    def get_latest_chat_id(self) -> int:
-        """
-        Retrieve the chat ID of the most recent chat session from the database.
-
-        Returns:
-        --------
-        int
-            The ID of the latest chat session.
-
-        Raises:
-        -------
-        mysql.connector.Error
-            If there is any error executing the SQL statements.
-        """
-        conn = self.db.connect()
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            SELECT chat_id FROM ChatDB.Chat_history WHERE
-            chat_id=(SELECT MAX(chat_id) FROM ChatDB.Chat_history)
-        """
-        )
-        chat_id_pk = cursor.fetchone()[0]
-        cursor.close()
-        conn.close()
-        return chat_id_pk
-
-    def insert_chat_data(
-        self, chat_id: int, user_message: str, assistant_message: str
-    ) -> None:
-        """
-        Insert a new chat data record into the database.
-
-        Parameters:
-        -----------
-        chat_id : int
-            The ID of the chat session to which this data belongs.
-        user_message : str
-            The message provided by the user in the chat session.
-        assistant_message : str
-            The response from the assistant in the chat session.
-
-        Raises:
-        -------
-        mysql.connector.Error
-            If there is any error executing the SQL statements.
-        """
-        conn = self.db.connect()
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
-            VALUES (%s, %s, %s)
-        """,
-            (chat_id, user_message, assistant_message),
-        )
-        conn.commit()
-        cursor.close()
-        conn.close()

From 508249e47ac72ce3ffad67832bb0860fbabc7b58 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 12:00:55 +0530
Subject: [PATCH 13/27] Update README.md

---
 neural_network/chatbot/README.md | 128 +++++++++++++++++++------------
 1 file changed, 78 insertions(+), 50 deletions(-)

diff --git a/neural_network/chatbot/README.md b/neural_network/chatbot/README.md
index 5e8f86b0c6fd..acddd4c8f671 100644
--- a/neural_network/chatbot/README.md
+++ b/neural_network/chatbot/README.md
@@ -1,55 +1,83 @@
-# Chatbot with Chat history stored in Database
 
-This project is a simple chatbot application built using Python, integrating a database for chat history storage and a language model service to generate responses. The chatbot can handle user messages, manage chat history, and terminate conversations upon receiving a `/stop` command.
+
+# Chatbot with LLM Integration and Database Storage
+
+This chatbot application integrates LLM (Large Language Model) API services, **Together** and **Groq**(you can use any one of them), to generate AI-driven responses. It stores conversation history in a MySQL database and manages chat sessions with triggers that update the status of conversations automatically.
 
 ## Features
-- **Conversation Handling**: The bot processes user inputs and generates responses using a language model service.
-- **Database Integration**: Stores chat data (user messages and bot responses) and maintains chat history.
-- **Session Management**: Supports starting and terminating chat sessions, including proper logging of start and end times.
-- **Message Truncation**: Limits conversation history to the last few messages if the conversation exceeds a large number of entries.
-
-## Components
-- **`Chatbot` Class**: Core logic for handling user messages and managing the chat lifecycle.
-- **`Database` (Mocked in tests)**: Handles chat data storage (methods for inserting and retrieving data).
-- **`LLM Service` (Mocked in tests)**: Generates responses to user input based on conversation history.
-
-## Installation
-1. Clone the repository:
-2. Install the necessary dependencies
-   ```bash
-   pip3 install requirements.txt
-   ```
-4. Run the bot or test it using `doctest`:
-    ```bash
-    python3 -m doctest -v chatbot.py
-    ```
-
-## Usage
-1. **Create Database**: Create a databse named `ChatDB` in Mysql
-2. **Create .env**:
+- Supports LLM response generation using **Together** and **Groq** APIs.
+- Stores chat sessions and message exchanges in MySQL database tables.
+- Automatically updates chat session status using database triggers.
+- Manages conversation history with user-assistant interaction.
+
+## Requirements
+
+Before running the application, ensure the following dependencies are installed:
+
+- Python 3.13+
+- MySQL Server
+- The following Python libraries:
+  ```bash
+  pip3 install -r requirements.txt
+  ```
+
+## Setup Instructions
+
+### Step 1: Set Up Environment Variables
+
+Create a `.env` file in the root directory of your project and add the following entries for your database credentials and API keys:
+
 ```
-  # Together API key
-  TOGETHER_API_KEY="YOUR_API_KEY"
-
-  # Groq API key
-  GROQ_API_KEY = "YOUR_API_KEY"
-
-  # MySQL connectionDB (if you're running locally)
-  DB_USER = "<DB_USER_NAME>"
-  DB_PASSWORD = "<DB_USER_NAME>"
-  DB_HOST = "127.0.0.1"
-  DB_NAME = "ChatDB"
-  PORT = "3306"
- ```
-7. **Handling Messages**: run below command to start the chat in console, you can login to your Database to check the chat history
-```python
-python3 main.py
+# Together API key
+TOGETHER_API_KEY="YOUR_API_KEY"
+
+# Groq API key
+GROQ_API_KEY = "YOUR_API_KEY"
+
+# MySQL connectionDB (if you're running locally)
+DB_USER = "<DB_USER_NAME>"
+DB_PASSWORD = "<DB_USER_NAME>"
+DB_HOST = "127.0.0.1"
+DB_NAME = "ChatDB"
+PORT = "3306"
+
+# API service to you(or use "Together")
+API_SERVICE = "Groq"
 ```
-10. **Ending the Chat**: When the user sends `/stop`, the chat will terminate and log the end of the conversation with the message 'conversation-terminated'
-
-## Testing
-The code includes basic `doctests` to verify the chatbot's functionality using mock services for the database and language model:
-- Run the tests:
-    ```bash
-    python3 -m doctest -v chatbot.py
-    ```
+
+### Step 2: Create MySQL Tables and Trigger
+
+The `create_tables()` function in the script automatically creates the necessary tables and a trigger for updating chat session statuses. To ensure the database is set up correctly, the function is called at the beginning of the script.
+
+Ensure that your MySQL server is running and accessible before running the code.
+
+### Step 3: Run the Application
+
+To start the chatbot:
+
+1. Ensure your MySQL server is running.
+2. Open a terminal and run the Python script:
+
+```bash
+python3 chat_db.py
+```
+
+The chatbot will initialize, and you can interact with it by typing your inputs. Type `/stop` to end the conversation.
+
+### Step 4: Test and Validate Code
+
+This project uses doctests to ensure that the functions work as expected. To run the doctests:
+
+```bash
+python3 -m doctest -v chatbot.py
+```
+
+Make sure to add doctests to all your functions where applicable, to validate both valid and erroneous inputs.
+
+### Key Functions
+
+- **create_tables()**: Sets up the MySQL tables (`Chat_history` and `Chat_data`) and the `update_is_stream` trigger.
+- **insert_chat_history()**: Inserts a new chat session into the `Chat_history` table.
+- **insert_chat_data()**: Inserts user-assistant message pairs into the `Chat_data` table.
+- **generate_llm_response()**: Generates a response from the selected LLM API service, either **Together** or **Groq**.
+

From 4af7a673a9efba034e0fa8cd47767016009fc535 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 12:01:25 +0530
Subject: [PATCH 14/27] Add files via upload

---
 neural_network/chatbot/chat_db.py | 236 ++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 neural_network/chatbot/chat_db.py

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
new file mode 100644
index 000000000000..c12e311bcadf
--- /dev/null
+++ b/neural_network/chatbot/chat_db.py
@@ -0,0 +1,236 @@
+import os
+import mysql.connector
+import datetime
+from dotenv import load_dotenv
+from together import Together
+from groq import Groq
+
+load_dotenv()
+
+# Database configuration
+db_config = {
+    "user": os.environ.get("DB_USER"),
+    "password": os.environ.get("DB_PASSWORD"),
+    "host": os.environ.get("DB_HOST"),
+    "database": os.environ.get("DB_NAME"),
+}
+
+api_service = os.environ.get("API_SERVICE")
+
+
+def create_tables() -> None:
+    """
+    Create the ChatDB.Chat_history and ChatDB.Chat_data tables if they do not exist.
+    Also, create a trigger to update is_stream in Chat_data when Chat_history.is_stream is updated.
+    """
+    try:
+        conn = mysql.connector.connect(**db_config)
+        cursor = conn.cursor()
+
+        cursor.execute(
+            """
+        CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
+            chat_id INT AUTO_INCREMENT PRIMARY KEY,
+            start_time DATETIME,
+            is_stream INT
+        )
+        """
+        )
+
+        cursor.execute(
+            """
+        CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
+            id INT AUTO_INCREMENT PRIMARY KEY,
+            chat_id INT,
+            user TEXT,
+            assistant TEXT,
+            FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
+        )
+        """
+        )
+
+        cursor.execute("DROP TRIGGER IF EXISTS update_is_stream;")
+
+        cursor.execute(
+            """
+        CREATE TRIGGER update_is_stream
+        AFTER UPDATE ON ChatDB.Chat_history
+        FOR EACH ROW
+        BEGIN
+            UPDATE ChatDB.Chat_data
+            SET is_stream = NEW.is_stream
+            WHERE chat_id = NEW.chat_id;
+        END;
+        """
+        )
+
+        conn.commit()
+    except mysql.connector.Error as err:
+        print(f"Error: {err}")
+    finally:
+        cursor.close()
+        conn.close()
+    print("Tables and trigger created successfully")
+
+
+def insert_chat_history(start_time: datetime.datetime, is_stream: int) -> None:
+    """
+    Insert a new row into the ChatDB.Chat_history table.
+    :param start_time: Timestamp of when the chat started
+    :param is_stream: Indicator of whether the conversation is ongoing, starting, or ending
+    """
+    try:
+        conn = mysql.connector.connect(**db_config)
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            INSERT INTO ChatDB.Chat_history (start_time, is_stream)
+            VALUES (%s, %s)
+        """,
+            (start_time, is_stream),
+        )
+        conn.commit()
+    except mysql.connector.Error as err:
+        print(f"Error: {err}")
+    finally:
+        cursor.close()
+        conn.close()
+
+
+def get_latest_chat_id() -> int:
+    """
+    Retrieve the latest chat_id from the ChatDB.Chat_history table.
+    :return: The latest chat_id or None if no chat_id exists.
+    """
+    try:
+        conn = mysql.connector.connect(**db_config)
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            SELECT chat_id FROM ChatDB.Chat_history 
+            ORDER BY chat_id DESC LIMIT 1
+        """
+        )
+        chat_id = cursor.fetchone()[0]
+        return chat_id if chat_id else None
+    except mysql.connector.Error as err:
+        print(f"Error: {err}")
+        return None
+    finally:
+        cursor.close()
+        conn.close()
+
+
+def insert_chat_data(chat_id: int, user_message: str, assistant_message: str) -> None:
+    """
+    Insert a new row into the ChatDB.Chat_data table.
+    :param chat_id: The ID of the chat session
+    :param user_message: The user's message
+    :param assistant_message: The assistant's message
+    """
+    try:
+        conn = mysql.connector.connect(**db_config)
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
+            VALUES (%s, %s, %s)
+        """,
+            (chat_id, user_message, assistant_message),
+        )
+        conn.commit()
+    except mysql.connector.Error as err:
+        print(f"Error: {err}")
+    finally:
+        cursor.close()
+        conn.close()
+
+
+def generate_llm_response(
+    conversation_history: list[dict], api_service: str = "Groq"
+) -> str:
+    """
+    Generate a response from the LLM based on the conversation history.
+    :param conversation_history: List of dictionaries representing the conversation so far
+    :param api_service: Choose between "Together" or "Groq" as the API service
+    :return: Assistant's response as a string
+    """
+    bot_response = ""
+    if api_service == "Together":
+        client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
+            messages=conversation_history,
+            max_tokens=512,
+            temperature=0.3,
+            top_p=0.7,
+            top_k=50,
+            repetition_penalty=1,
+            stop=["<|eot_id|>", "<|eom_id|>"],
+            stream=False,
+        )
+        bot_response = response.choices[0].message.content
+    else:
+        client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+        response = client.chat.completions.create(
+            model="llama3-8b-8192",
+            messages=conversation_history,
+            max_tokens=1024,
+            temperature=0.3,
+            top_p=0.7,
+            stop=["<|eot_id|>", "<|eom_id|>"],
+            stream=False,
+        )
+        bot_response = response.choices[0].message.content
+
+    return bot_response
+
+
+def chat_session() -> None:
+    """
+    Start a chatbot session, allowing the user to interact with the LLM.
+    Saves conversation history in the database and ends the session on "/stop" command.
+    """
+    print("Welcome to the chatbot! Type '/stop' to end the conversation.")
+
+    conversation_history = []
+    start_time = datetime.datetime.now()
+    chat_id_pk = None
+    api_service = "Groq"  # or "Together"
+
+    while True:
+        user_input = input("\nYou: ").strip()
+        conversation_history.append({"role": "user", "content": user_input})
+
+        if chat_id_pk is None:
+            if user_input.lower() == "/stop":
+                break
+            bot_response = generate_llm_response(conversation_history, api_service)
+            conversation_history.append({"role": "assistant", "content": bot_response})
+
+            is_stream = 1  # New conversation
+            insert_chat_history(start_time, is_stream)
+            chat_id_pk = get_latest_chat_id()
+            insert_chat_data(chat_id_pk, user_input, bot_response)
+        else:
+            if user_input.lower() == "/stop":
+                is_stream = 2  # End of conversation
+                current_time = datetime.datetime.now()
+                insert_chat_history(current_time, is_stream)
+                break
+
+            bot_response = generate_llm_response(conversation_history, api_service)
+            conversation_history.append({"role": "assistant", "content": bot_response})
+
+            is_stream = 0  # Continuation of conversation
+            current_time = datetime.datetime.now()
+            insert_chat_history(current_time, is_stream)
+            insert_chat_data(chat_id_pk, user_input, bot_response)
+
+        if len(conversation_history) > 1000:
+            conversation_history = conversation_history[-3:]
+
+
+# Example of starting a chat session
+create_tables()
+chat_session()

From 7c4905290412629fffe5aefcb117a58ba9dadad5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Oct 2024 06:36:55 +0000
Subject: [PATCH 15/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/chatbot/chat_db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index c12e311bcadf..ad42def0f4f8 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -107,7 +107,7 @@ def get_latest_chat_id() -> int:
         cursor = conn.cursor()
         cursor.execute(
             """
-            SELECT chat_id FROM ChatDB.Chat_history 
+            SELECT chat_id FROM ChatDB.Chat_history
             ORDER BY chat_id DESC LIMIT 1
         """
         )

From 276528d343bd11af4417dacb29f1afbe4cc82ffc Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 12:34:03 +0530
Subject: [PATCH 16/27] Add files via upload

made changes suggested by auto-checker
---
 neural_network/chatbot/chat_db.py | 34 ++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index ad42def0f4f8..b537f41215d2 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -1,7 +1,12 @@
+"""
+credits : https://medium.com/google-developer-experts/beyond-live-sessions-building-persistent-memory-chatbots-with-langchain-gemini-pro-and-firebase-19d6f84e21d3
+
+"""
+
 import os
-import mysql.connector
 import datetime
 from dotenv import load_dotenv
+import mysql.connector
 from together import Together
 from groq import Groq
 
@@ -20,8 +25,9 @@
 
 def create_tables() -> None:
     """
-    Create the ChatDB.Chat_history and ChatDB.Chat_data tables if they do not exist.
-    Also, create a trigger to update is_stream in Chat_data when Chat_history.is_stream is updated.
+    Create the ChatDB.Chat_history and ChatDB.Chat_data tables
+    if they do not exist.Also, create a trigger to update is_stream
+    in Chat_data when Chat_history.is_stream is updated.
     """
     try:
         conn = mysql.connector.connect(**db_config)
@@ -77,7 +83,8 @@ def insert_chat_history(start_time: datetime.datetime, is_stream: int) -> None:
     """
     Insert a new row into the ChatDB.Chat_history table.
     :param start_time: Timestamp of when the chat started
-    :param is_stream: Indicator of whether the conversation is ongoing, starting, or ending
+    :param is_stream: Indicator of whether the conversation is
+                                 ongoing, starting, or ending
     """
     try:
         conn = mysql.connector.connect(**db_config)
@@ -107,7 +114,7 @@ def get_latest_chat_id() -> int:
         cursor = conn.cursor()
         cursor.execute(
             """
-            SELECT chat_id FROM ChatDB.Chat_history
+            SELECT chat_id FROM ChatDB.Chat_history 
             ORDER BY chat_id DESC LIMIT 1
         """
         )
@@ -115,7 +122,7 @@ def get_latest_chat_id() -> int:
         return chat_id if chat_id else None
     except mysql.connector.Error as err:
         print(f"Error: {err}")
-        return None
+        return 0
     finally:
         cursor.close()
         conn.close()
@@ -151,8 +158,10 @@ def generate_llm_response(
 ) -> str:
     """
     Generate a response from the LLM based on the conversation history.
-    :param conversation_history: List of dictionaries representing the conversation so far
-    :param api_service: Choose between "Together" or "Groq" as the API service
+    :param conversation_history: List of dictionaries representing
+                                                        the conversation so far
+    :param api_service: Choose between "Together" or "Groq" as the
+                                        API service
     :return: Assistant's response as a string
     """
     bot_response = ""
@@ -194,7 +203,8 @@ def chat_session() -> None:
     print("Welcome to the chatbot! Type '/stop' to end the conversation.")
 
     conversation_history = []
-    start_time = datetime.datetime.now()
+    start_time = datetime.datetime.now(datetime.timezone.utc)
+
     chat_id_pk = None
     api_service = "Groq"  # or "Together"
 
@@ -215,7 +225,7 @@ def chat_session() -> None:
         else:
             if user_input.lower() == "/stop":
                 is_stream = 2  # End of conversation
-                current_time = datetime.datetime.now()
+                current_time = datetime.datetime.now(datetime.timezone.utc)
                 insert_chat_history(current_time, is_stream)
                 break
 
@@ -223,7 +233,7 @@ def chat_session() -> None:
             conversation_history.append({"role": "assistant", "content": bot_response})
 
             is_stream = 0  # Continuation of conversation
-            current_time = datetime.datetime.now()
+            current_time = datetime.datetime.now(datetime.timezone.utc)
             insert_chat_history(current_time, is_stream)
             insert_chat_data(chat_id_pk, user_input, bot_response)
 
@@ -231,6 +241,6 @@ def chat_session() -> None:
             conversation_history = conversation_history[-3:]
 
 
-# Example of starting a chat session
+# starting a chat session 
 create_tables()
 chat_session()

From 789e97514886374c5795ffa99f33f67e626da827 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Oct 2024 07:07:35 +0000
Subject: [PATCH 17/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/chatbot/chat_db.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index b537f41215d2..5f27512942a0 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -114,7 +114,7 @@ def get_latest_chat_id() -> int:
         cursor = conn.cursor()
         cursor.execute(
             """
-            SELECT chat_id FROM ChatDB.Chat_history 
+            SELECT chat_id FROM ChatDB.Chat_history
             ORDER BY chat_id DESC LIMIT 1
         """
         )
@@ -241,6 +241,6 @@ def chat_session() -> None:
             conversation_history = conversation_history[-3:]
 
 
-# starting a chat session 
+# starting a chat session
 create_tables()
 chat_session()

From 3e4430dfb66dd2a1339fa9aa32ec50b97ee5293e Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 13:14:28 +0530
Subject: [PATCH 18/27] Add files via upload

---
 neural_network/chatbot/chat_db.py | 484 ++++++++++++++++--------------
 1 file changed, 263 insertions(+), 221 deletions(-)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index 5f27512942a0..3dce67120c94 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -1,14 +1,12 @@
-"""
-credits : https://medium.com/google-developer-experts/beyond-live-sessions-building-persistent-memory-chatbots-with-langchain-gemini-pro-and-firebase-19d6f84e21d3
-
-"""
-
 import os
 import datetime
-from dotenv import load_dotenv
 import mysql.connector
+from dotenv import load_dotenv
 from together import Together
 from groq import Groq
+import unittest
+from unittest.mock import patch
+from io import StringIO
 
 load_dotenv()
 
@@ -20,227 +18,271 @@
     "database": os.environ.get("DB_NAME"),
 }
 
-api_service = os.environ.get("API_SERVICE")
-
-
-def create_tables() -> None:
-    """
-    Create the ChatDB.Chat_history and ChatDB.Chat_data tables
-    if they do not exist.Also, create a trigger to update is_stream
-    in Chat_data when Chat_history.is_stream is updated.
-    """
-    try:
-        conn = mysql.connector.connect(**db_config)
-        cursor = conn.cursor()
-
-        cursor.execute(
-            """
-        CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
-            chat_id INT AUTO_INCREMENT PRIMARY KEY,
-            start_time DATETIME,
-            is_stream INT
-        )
+class LLMService:
+    def __init__(self, api_service: str):
+        self.api_service = api_service
+        if self.api_service == "Together":
+            self.client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
+        else:
+            self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+
+    def generate_response(self, conversation_history: list[dict]) -> str:
         """
-        )
-
-        cursor.execute(
-            """
-        CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
-            id INT AUTO_INCREMENT PRIMARY KEY,
-            chat_id INT,
-            user TEXT,
-            assistant TEXT,
-            FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
-        )
+        Generate a response from the LLM based on the conversation history.
+
+        Example:
+        >>> llm_service = LLMService(api_service="Groq")
+        >>> response = llm_service.generate_response([{"role": "user", "content": "Hello"}])
+        >>> isinstance(response, str)
+        True
         """
-        )
-
-        cursor.execute("DROP TRIGGER IF EXISTS update_is_stream;")
-
-        cursor.execute(
-            """
-        CREATE TRIGGER update_is_stream
-        AFTER UPDATE ON ChatDB.Chat_history
-        FOR EACH ROW
-        BEGIN
-            UPDATE ChatDB.Chat_data
-            SET is_stream = NEW.is_stream
-            WHERE chat_id = NEW.chat_id;
-        END;
+        if self.api_service == "Together":
+            response = self.client.chat.completions.create(
+                model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
+                messages=conversation_history,
+                max_tokens=512,
+                temperature=0.3,
+                top_p=0.7,
+                top_k=50,
+                repetition_penalty=1,
+                stop=["<|eot_id|>", "<|eom_id|>"],
+                stream=False,
+            )
+        else:
+            response = self.client.chat.completions.create(
+                model="llama3-8b-8192",
+                messages=conversation_history,
+                max_tokens=1024,
+                temperature=0.3,
+                top_p=0.7,
+                stop=["<|eot_id|>", "<|eom_id|>"],
+                stream=False,
+            )
+        
+        return response.choices[0].message.content
+
+
+class ChatDB:
+    @staticmethod
+    def create_tables() -> None:
         """
-        )
-
-        conn.commit()
-    except mysql.connector.Error as err:
-        print(f"Error: {err}")
-    finally:
-        cursor.close()
-        conn.close()
-    print("Tables and trigger created successfully")
-
-
-def insert_chat_history(start_time: datetime.datetime, is_stream: int) -> None:
-    """
-    Insert a new row into the ChatDB.Chat_history table.
-    :param start_time: Timestamp of when the chat started
-    :param is_stream: Indicator of whether the conversation is
-                                 ongoing, starting, or ending
-    """
-    try:
-        conn = mysql.connector.connect(**db_config)
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            INSERT INTO ChatDB.Chat_history (start_time, is_stream)
-            VALUES (%s, %s)
-        """,
-            (start_time, is_stream),
-        )
-        conn.commit()
-    except mysql.connector.Error as err:
-        print(f"Error: {err}")
-    finally:
-        cursor.close()
-        conn.close()
-
-
-def get_latest_chat_id() -> int:
-    """
-    Retrieve the latest chat_id from the ChatDB.Chat_history table.
-    :return: The latest chat_id or None if no chat_id exists.
-    """
-    try:
-        conn = mysql.connector.connect(**db_config)
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            SELECT chat_id FROM ChatDB.Chat_history
-            ORDER BY chat_id DESC LIMIT 1
+        Create the ChatDB.Chat_history and ChatDB.Chat_data tables
+        if they do not exist. Also, create a trigger to update is_stream
+        in Chat_data when Chat_history.is_stream is updated.
+
+        Example:
+        >>> ChatDB.create_tables()
+        Tables and trigger created successfully
         """
-        )
-        chat_id = cursor.fetchone()[0]
-        return chat_id if chat_id else None
-    except mysql.connector.Error as err:
-        print(f"Error: {err}")
-        return 0
-    finally:
-        cursor.close()
-        conn.close()
-
-
-def insert_chat_data(chat_id: int, user_message: str, assistant_message: str) -> None:
-    """
-    Insert a new row into the ChatDB.Chat_data table.
-    :param chat_id: The ID of the chat session
-    :param user_message: The user's message
-    :param assistant_message: The assistant's message
-    """
-    try:
-        conn = mysql.connector.connect(**db_config)
-        cursor = conn.cursor()
-        cursor.execute(
-            """
-            INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
-            VALUES (%s, %s, %s)
-        """,
-            (chat_id, user_message, assistant_message),
-        )
-        conn.commit()
-    except mysql.connector.Error as err:
-        print(f"Error: {err}")
-    finally:
-        cursor.close()
-        conn.close()
-
-
-def generate_llm_response(
-    conversation_history: list[dict], api_service: str = "Groq"
-) -> str:
-    """
-    Generate a response from the LLM based on the conversation history.
-    :param conversation_history: List of dictionaries representing
-                                                        the conversation so far
-    :param api_service: Choose between "Together" or "Groq" as the
-                                        API service
-    :return: Assistant's response as a string
-    """
-    bot_response = ""
-    if api_service == "Together":
-        client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
-        response = client.chat.completions.create(
-            model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
-            messages=conversation_history,
-            max_tokens=512,
-            temperature=0.3,
-            top_p=0.7,
-            top_k=50,
-            repetition_penalty=1,
-            stop=["<|eot_id|>", "<|eom_id|>"],
-            stream=False,
-        )
-        bot_response = response.choices[0].message.content
-    else:
-        client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-        response = client.chat.completions.create(
-            model="llama3-8b-8192",
-            messages=conversation_history,
-            max_tokens=1024,
-            temperature=0.3,
-            top_p=0.7,
-            stop=["<|eot_id|>", "<|eom_id|>"],
-            stream=False,
-        )
-        bot_response = response.choices[0].message.content
-
-    return bot_response
-
-
-def chat_session() -> None:
-    """
-    Start a chatbot session, allowing the user to interact with the LLM.
-    Saves conversation history in the database and ends the session on "/stop" command.
-    """
-    print("Welcome to the chatbot! Type '/stop' to end the conversation.")
-
-    conversation_history = []
-    start_time = datetime.datetime.now(datetime.timezone.utc)
-
-    chat_id_pk = None
-    api_service = "Groq"  # or "Together"
-
-    while True:
-        user_input = input("\nYou: ").strip()
-        conversation_history.append({"role": "user", "content": user_input})
-
-        if chat_id_pk is None:
-            if user_input.lower() == "/stop":
-                break
-            bot_response = generate_llm_response(conversation_history, api_service)
-            conversation_history.append({"role": "assistant", "content": bot_response})
-
-            is_stream = 1  # New conversation
-            insert_chat_history(start_time, is_stream)
-            chat_id_pk = get_latest_chat_id()
-            insert_chat_data(chat_id_pk, user_input, bot_response)
-        else:
-            if user_input.lower() == "/stop":
-                is_stream = 2  # End of conversation
-                current_time = datetime.datetime.now(datetime.timezone.utc)
-                insert_chat_history(current_time, is_stream)
-                break
+        try:
+            conn = mysql.connector.connect(**db_config)
+            cursor = conn.cursor()
+
+            cursor.execute(
+                """
+                CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
+                    chat_id INT AUTO_INCREMENT PRIMARY KEY,
+                    start_time DATETIME,
+                    is_stream INT
+                )
+                """
+            )
+
+            cursor.execute(
+                """
+                CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
+                    id INT AUTO_INCREMENT PRIMARY KEY,
+                    chat_id INT,
+                    user TEXT,
+                    assistant TEXT,
+                    FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
+                )
+                """
+            )
+
+            cursor.execute("DROP TRIGGER IF EXISTS update_is_stream;")
+
+            cursor.execute(
+                """
+                CREATE TRIGGER update_is_stream
+                AFTER UPDATE ON ChatDB.Chat_history
+                FOR EACH ROW
+                BEGIN
+                    UPDATE ChatDB.Chat_data
+                    SET is_stream = NEW.is_stream
+                    WHERE chat_id = NEW.chat_id;
+                END;
+                """
+            )
+
+            conn.commit()
+            print("Tables and trigger created successfully")
+        except mysql.connector.Error as err:
+            print(f"Error: {err}")
+        finally:
+            cursor.close()
+            conn.close()
+
+    @staticmethod
+    def insert_chat_history(start_time: datetime.datetime, is_stream: int) -> int:
+        """
+        Insert a new row into the ChatDB.Chat_history table and return the inserted chat_id.
 
-            bot_response = generate_llm_response(conversation_history, api_service)
-            conversation_history.append({"role": "assistant", "content": bot_response})
+        Example:
+        >>> from datetime import datetime
+        >>> chat_id = ChatDB.insert_chat_history(datetime(2024, 1, 1, 12, 0, 0), 1)
+        >>> isinstance(chat_id, int)
+        True
+        """
+        try:
+            conn = mysql.connector.connect(**db_config)
+            cursor = conn.cursor()
+            cursor.execute(
+                """
+                INSERT INTO ChatDB.Chat_history (start_time, is_stream)
+                VALUES (%s, %s)
+                """,
+                (start_time, is_stream),
+            )
+            conn.commit()
+            cursor.execute("SELECT LAST_INSERT_ID()")
+            chat_id = cursor.fetchone()[0]
+            print("Chat history inserted successfully.")
+            return chat_id
+        except mysql.connector.Error as err:
+            print(f"Error: {err}")
+            return None
+        finally:
+            cursor.close()
+            conn.close()
+
+    @staticmethod
+    def get_latest_chat_id() -> int:
+        """
+        Retrieve the latest chat_id from the ChatDB.Chat_history table.
+        :return: The latest chat_id or None if no chat_id exists.
 
-            is_stream = 0  # Continuation of conversation
-            current_time = datetime.datetime.now(datetime.timezone.utc)
-            insert_chat_history(current_time, is_stream)
-            insert_chat_data(chat_id_pk, user_input, bot_response)
+        Example:
+        >>> chat_id = ChatDB.get_latest_chat_id()
+        >>> isinstance(chat_id, int)
+        True
+        """
+        try:
+            conn = mysql.connector.connect(**db_config)
+            cursor = conn.cursor()
+            cursor.execute(
+                """
+                SELECT chat_id FROM ChatDB.Chat_history 
+                ORDER BY chat_id DESC LIMIT 1
+                """
+            )
+            chat_id = cursor.fetchone()[0]
+            return chat_id if chat_id else None
+        except mysql.connector.Error as err:
+            print(f"Error: {err}")
+            return None
+        finally:
+            cursor.close()
+            conn.close()
+
+    @staticmethod
+    def insert_chat_data(chat_id: int, user_message: str, assistant_message: str) -> None:
+        """
+        Insert a new row into the ChatDB.Chat_data table.
+        
+        Example:
+        >>> ChatDB.insert_chat_data(1, 'Hello', 'Hi there!')
+        Chat data inserted successfully.
+        """
+        try:
+            conn = mysql.connector.connect(**db_config)
+            cursor = conn.cursor()
+            cursor.execute(
+                """
+                INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
+                VALUES (%s, %s, %s)
+                """,
+                (chat_id, user_message, assistant_message),
+            )
+            conn.commit()
+            print("Chat data inserted successfully.")
+        except mysql.connector.Error as err:
+            print(f"Error: {err}")
+        finally:
+            cursor.close()
+            conn.close()
+
+
+class Chatbot:
+    def __init__(self, api_service: str):
+        self.llm_service = LLMService(api_service)
+        self.conversation_history = []
+        self.chat_id_pk = None
+        self.start_time = datetime.datetime.now(datetime.timezone.utc)
+
+    def chat_session(self) -> None:
+        """
+        Start a chatbot session, allowing the user to interact with the LLM.
+        Saves conversation history in the database and ends the session on "/stop" command.
+        
+        Example:
+        >>> chatbot = Chatbot(api_service="Groq")
+        >>> chatbot.chat_session()  # This will be mocked in the tests
+        Welcome to the chatbot! Type '/stop' to end the conversation.
+        """
+        print("Welcome to the chatbot! Type '/stop' to end the conversation.")
+
+        while True:
+            user_input = input("\nYou: ").strip()
+            self.conversation_history.append({"role": "user", "content": user_input})
+
+            if self.chat_id_pk is None:
+                if user_input.lower() == "/stop":
+                    break
+                bot_response = self.llm_service.generate_response(self.conversation_history)
+                self.conversation_history.append({"role": "assistant", "content": bot_response})
+
+                is_stream = 1  # New conversation
+                self.chat_id_pk = ChatDB.insert_chat_history(self.start_time, is_stream)  # Return the chat_id
+                if self.chat_id_pk:
+                    ChatDB.insert_chat_data(self.chat_id_pk, user_input, bot_response)
+            else:
+                if user_input.lower() == "/stop":
+                    is_stream = 2  # End of conversation
+                    current_time = datetime.datetime.now(datetime.timezone.utc)
+                    ChatDB.insert_chat_history(current_time, is_stream)
+                    break
+
+                bot_response = self.llm_service.generate_response(self.conversation_history)
+                self.conversation_history.append({"role": "assistant", "content": bot_response})
+
+                is_stream = 0  # Continuation of conversation
+                current_time = datetime.datetime.now(datetime.timezone.utc)
+                ChatDB.insert_chat_history(current_time, is_stream)
+                ChatDB.insert_chat_data(self.chat_id_pk, user_input, bot_response)
 
-        if len(conversation_history) > 1000:
-            conversation_history = conversation_history[-3:]
+            if len(self.conversation_history) > 1000:
+                self.conversation_history = self.conversation_history[-3:]
 
+# Test cases for Chatbot
+class TestChatbot(unittest.TestCase):
 
-# starting a chat session
-create_tables()
-chat_session()
+    @patch('builtins.input', side_effect=["Hello", "/stop"])
+    @patch('sys.stdout', new_callable=StringIO)
+    def test_chat_session(self, mock_stdout, mock_input):
+        """
+        Test the chat_session method for expected welcome message.
+        """
+        chatbot = Chatbot(api_service="Groq")
+        chatbot.chat_session()
+        
+        # Check for the welcome message in the output
+        output = mock_stdout.getvalue().strip().splitlines()
+        self.assertIn("Welcome to the chatbot! Type '/stop' to end the conversation.", output)
+        self.assertTrue(any("Chat history inserted successfully." in line for line in output))
+        self.assertTrue(any("Chat data inserted successfully." in line for line in output))
+
+if __name__ == "__main__":
+    #
+    ChatDB.create_tables()
+    unittest.main()

From 322434dc169b6b78926324639ce647ba838826ed Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 13:18:09 +0530
Subject: [PATCH 19/27] Add files via upload

---
 neural_network/chatbot/chat_db.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index 3dce67120c94..e888bc0eceb7 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -1,3 +1,7 @@
+"""
+credits : https://medium.com/google-developer-experts/beyond-live-sessions-building-persistent-memory-chatbots-with-langchain-gemini-pro-and-firebase-19d6f84e21d3
+"""
+
 import os
 import datetime
 import mysql.connector

From 5d91b30f87d281ac1cad24f22d4c8bc89746a385 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Oct 2024 07:49:51 +0000
Subject: [PATCH 20/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/chatbot/chat_db.py | 54 +++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
index e888bc0eceb7..c1758ed0ac95 100644
--- a/neural_network/chatbot/chat_db.py
+++ b/neural_network/chatbot/chat_db.py
@@ -22,6 +22,7 @@
     "database": os.environ.get("DB_NAME"),
 }
 
+
 class LLMService:
     def __init__(self, api_service: str):
         self.api_service = api_service
@@ -62,7 +63,7 @@ def generate_response(self, conversation_history: list[dict]) -> str:
                 stop=["<|eot_id|>", "<|eom_id|>"],
                 stream=False,
             )
-        
+
         return response.choices[0].message.content
 
 
@@ -176,7 +177,7 @@ def get_latest_chat_id() -> int:
             cursor = conn.cursor()
             cursor.execute(
                 """
-                SELECT chat_id FROM ChatDB.Chat_history 
+                SELECT chat_id FROM ChatDB.Chat_history
                 ORDER BY chat_id DESC LIMIT 1
                 """
             )
@@ -190,10 +191,12 @@ def get_latest_chat_id() -> int:
             conn.close()
 
     @staticmethod
-    def insert_chat_data(chat_id: int, user_message: str, assistant_message: str) -> None:
+    def insert_chat_data(
+        chat_id: int, user_message: str, assistant_message: str
+    ) -> None:
         """
         Insert a new row into the ChatDB.Chat_data table.
-        
+
         Example:
         >>> ChatDB.insert_chat_data(1, 'Hello', 'Hi there!')
         Chat data inserted successfully.
@@ -228,7 +231,7 @@ def chat_session(self) -> None:
         """
         Start a chatbot session, allowing the user to interact with the LLM.
         Saves conversation history in the database and ends the session on "/stop" command.
-        
+
         Example:
         >>> chatbot = Chatbot(api_service="Groq")
         >>> chatbot.chat_session()  # This will be mocked in the tests
@@ -243,11 +246,17 @@ def chat_session(self) -> None:
             if self.chat_id_pk is None:
                 if user_input.lower() == "/stop":
                     break
-                bot_response = self.llm_service.generate_response(self.conversation_history)
-                self.conversation_history.append({"role": "assistant", "content": bot_response})
+                bot_response = self.llm_service.generate_response(
+                    self.conversation_history
+                )
+                self.conversation_history.append(
+                    {"role": "assistant", "content": bot_response}
+                )
 
                 is_stream = 1  # New conversation
-                self.chat_id_pk = ChatDB.insert_chat_history(self.start_time, is_stream)  # Return the chat_id
+                self.chat_id_pk = ChatDB.insert_chat_history(
+                    self.start_time, is_stream
+                )  # Return the chat_id
                 if self.chat_id_pk:
                     ChatDB.insert_chat_data(self.chat_id_pk, user_input, bot_response)
             else:
@@ -257,8 +266,12 @@ def chat_session(self) -> None:
                     ChatDB.insert_chat_history(current_time, is_stream)
                     break
 
-                bot_response = self.llm_service.generate_response(self.conversation_history)
-                self.conversation_history.append({"role": "assistant", "content": bot_response})
+                bot_response = self.llm_service.generate_response(
+                    self.conversation_history
+                )
+                self.conversation_history.append(
+                    {"role": "assistant", "content": bot_response}
+                )
 
                 is_stream = 0  # Continuation of conversation
                 current_time = datetime.datetime.now(datetime.timezone.utc)
@@ -268,23 +281,30 @@ def chat_session(self) -> None:
             if len(self.conversation_history) > 1000:
                 self.conversation_history = self.conversation_history[-3:]
 
+
 # Test cases for Chatbot
 class TestChatbot(unittest.TestCase):
-
-    @patch('builtins.input', side_effect=["Hello", "/stop"])
-    @patch('sys.stdout', new_callable=StringIO)
+    @patch("builtins.input", side_effect=["Hello", "/stop"])
+    @patch("sys.stdout", new_callable=StringIO)
     def test_chat_session(self, mock_stdout, mock_input):
         """
         Test the chat_session method for expected welcome message.
         """
         chatbot = Chatbot(api_service="Groq")
         chatbot.chat_session()
-        
+
         # Check for the welcome message in the output
         output = mock_stdout.getvalue().strip().splitlines()
-        self.assertIn("Welcome to the chatbot! Type '/stop' to end the conversation.", output)
-        self.assertTrue(any("Chat history inserted successfully." in line for line in output))
-        self.assertTrue(any("Chat data inserted successfully." in line for line in output))
+        self.assertIn(
+            "Welcome to the chatbot! Type '/stop' to end the conversation.", output
+        )
+        self.assertTrue(
+            any("Chat history inserted successfully." in line for line in output)
+        )
+        self.assertTrue(
+            any("Chat data inserted successfully." in line for line in output)
+        )
+
 
 if __name__ == "__main__":
     #

From 30170fd9a37fb0756fbbdcf6cc9e070dc53958c1 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 21:33:49 +0530
Subject: [PATCH 21/27] Delete neural_network/chatbot directory

---
 neural_network/chatbot/README.md        |  83 -------
 neural_network/chatbot/chat_db.py       | 312 ------------------------
 neural_network/chatbot/requirements.txt |  57 -----
 3 files changed, 452 deletions(-)
 delete mode 100644 neural_network/chatbot/README.md
 delete mode 100644 neural_network/chatbot/chat_db.py
 delete mode 100644 neural_network/chatbot/requirements.txt

diff --git a/neural_network/chatbot/README.md b/neural_network/chatbot/README.md
deleted file mode 100644
index acddd4c8f671..000000000000
--- a/neural_network/chatbot/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-# Chatbot with LLM Integration and Database Storage
-
-This chatbot application integrates LLM (Large Language Model) API services, **Together** and **Groq**(you can use any one of them), to generate AI-driven responses. It stores conversation history in a MySQL database and manages chat sessions with triggers that update the status of conversations automatically.
-
-## Features
-- Supports LLM response generation using **Together** and **Groq** APIs.
-- Stores chat sessions and message exchanges in MySQL database tables.
-- Automatically updates chat session status using database triggers.
-- Manages conversation history with user-assistant interaction.
-
-## Requirements
-
-Before running the application, ensure the following dependencies are installed:
-
-- Python 3.13+
-- MySQL Server
-- The following Python libraries:
-  ```bash
-  pip3 install -r requirements.txt
-  ```
-
-## Setup Instructions
-
-### Step 1: Set Up Environment Variables
-
-Create a `.env` file in the root directory of your project and add the following entries for your database credentials and API keys:
-
-```
-# Together API key
-TOGETHER_API_KEY="YOUR_API_KEY"
-
-# Groq API key
-GROQ_API_KEY = "YOUR_API_KEY"
-
-# MySQL connectionDB (if you're running locally)
-DB_USER = "<DB_USER_NAME>"
-DB_PASSWORD = "<DB_USER_NAME>"
-DB_HOST = "127.0.0.1"
-DB_NAME = "ChatDB"
-PORT = "3306"
-
-# API service to you(or use "Together")
-API_SERVICE = "Groq"
-```
-
-### Step 2: Create MySQL Tables and Trigger
-
-The `create_tables()` function in the script automatically creates the necessary tables and a trigger for updating chat session statuses. To ensure the database is set up correctly, the function is called at the beginning of the script.
-
-Ensure that your MySQL server is running and accessible before running the code.
-
-### Step 3: Run the Application
-
-To start the chatbot:
-
-1. Ensure your MySQL server is running.
-2. Open a terminal and run the Python script:
-
-```bash
-python3 chat_db.py
-```
-
-The chatbot will initialize, and you can interact with it by typing your inputs. Type `/stop` to end the conversation.
-
-### Step 4: Test and Validate Code
-
-This project uses doctests to ensure that the functions work as expected. To run the doctests:
-
-```bash
-python3 -m doctest -v chatbot.py
-```
-
-Make sure to add doctests to all your functions where applicable, to validate both valid and erroneous inputs.
-
-### Key Functions
-
-- **create_tables()**: Sets up the MySQL tables (`Chat_history` and `Chat_data`) and the `update_is_stream` trigger.
-- **insert_chat_history()**: Inserts a new chat session into the `Chat_history` table.
-- **insert_chat_data()**: Inserts user-assistant message pairs into the `Chat_data` table.
-- **generate_llm_response()**: Generates a response from the selected LLM API service, either **Together** or **Groq**.
-
diff --git a/neural_network/chatbot/chat_db.py b/neural_network/chatbot/chat_db.py
deleted file mode 100644
index c1758ed0ac95..000000000000
--- a/neural_network/chatbot/chat_db.py
+++ /dev/null
@@ -1,312 +0,0 @@
-"""
-credits : https://medium.com/google-developer-experts/beyond-live-sessions-building-persistent-memory-chatbots-with-langchain-gemini-pro-and-firebase-19d6f84e21d3
-"""
-
-import os
-import datetime
-import mysql.connector
-from dotenv import load_dotenv
-from together import Together
-from groq import Groq
-import unittest
-from unittest.mock import patch
-from io import StringIO
-
-load_dotenv()
-
-# Database configuration
-db_config = {
-    "user": os.environ.get("DB_USER"),
-    "password": os.environ.get("DB_PASSWORD"),
-    "host": os.environ.get("DB_HOST"),
-    "database": os.environ.get("DB_NAME"),
-}
-
-
-class LLMService:
-    def __init__(self, api_service: str):
-        self.api_service = api_service
-        if self.api_service == "Together":
-            self.client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
-        else:
-            self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-
-    def generate_response(self, conversation_history: list[dict]) -> str:
-        """
-        Generate a response from the LLM based on the conversation history.
-
-        Example:
-        >>> llm_service = LLMService(api_service="Groq")
-        >>> response = llm_service.generate_response([{"role": "user", "content": "Hello"}])
-        >>> isinstance(response, str)
-        True
-        """
-        if self.api_service == "Together":
-            response = self.client.chat.completions.create(
-                model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
-                messages=conversation_history,
-                max_tokens=512,
-                temperature=0.3,
-                top_p=0.7,
-                top_k=50,
-                repetition_penalty=1,
-                stop=["<|eot_id|>", "<|eom_id|>"],
-                stream=False,
-            )
-        else:
-            response = self.client.chat.completions.create(
-                model="llama3-8b-8192",
-                messages=conversation_history,
-                max_tokens=1024,
-                temperature=0.3,
-                top_p=0.7,
-                stop=["<|eot_id|>", "<|eom_id|>"],
-                stream=False,
-            )
-
-        return response.choices[0].message.content
-
-
-class ChatDB:
-    @staticmethod
-    def create_tables() -> None:
-        """
-        Create the ChatDB.Chat_history and ChatDB.Chat_data tables
-        if they do not exist. Also, create a trigger to update is_stream
-        in Chat_data when Chat_history.is_stream is updated.
-
-        Example:
-        >>> ChatDB.create_tables()
-        Tables and trigger created successfully
-        """
-        try:
-            conn = mysql.connector.connect(**db_config)
-            cursor = conn.cursor()
-
-            cursor.execute(
-                """
-                CREATE TABLE IF NOT EXISTS ChatDB.Chat_history (
-                    chat_id INT AUTO_INCREMENT PRIMARY KEY,
-                    start_time DATETIME,
-                    is_stream INT
-                )
-                """
-            )
-
-            cursor.execute(
-                """
-                CREATE TABLE IF NOT EXISTS ChatDB.Chat_data (
-                    id INT AUTO_INCREMENT PRIMARY KEY,
-                    chat_id INT,
-                    user TEXT,
-                    assistant TEXT,
-                    FOREIGN KEY (chat_id) REFERENCES ChatDB.Chat_history(chat_id)
-                )
-                """
-            )
-
-            cursor.execute("DROP TRIGGER IF EXISTS update_is_stream;")
-
-            cursor.execute(
-                """
-                CREATE TRIGGER update_is_stream
-                AFTER UPDATE ON ChatDB.Chat_history
-                FOR EACH ROW
-                BEGIN
-                    UPDATE ChatDB.Chat_data
-                    SET is_stream = NEW.is_stream
-                    WHERE chat_id = NEW.chat_id;
-                END;
-                """
-            )
-
-            conn.commit()
-            print("Tables and trigger created successfully")
-        except mysql.connector.Error as err:
-            print(f"Error: {err}")
-        finally:
-            cursor.close()
-            conn.close()
-
-    @staticmethod
-    def insert_chat_history(start_time: datetime.datetime, is_stream: int) -> int:
-        """
-        Insert a new row into the ChatDB.Chat_history table and return the inserted chat_id.
-
-        Example:
-        >>> from datetime import datetime
-        >>> chat_id = ChatDB.insert_chat_history(datetime(2024, 1, 1, 12, 0, 0), 1)
-        >>> isinstance(chat_id, int)
-        True
-        """
-        try:
-            conn = mysql.connector.connect(**db_config)
-            cursor = conn.cursor()
-            cursor.execute(
-                """
-                INSERT INTO ChatDB.Chat_history (start_time, is_stream)
-                VALUES (%s, %s)
-                """,
-                (start_time, is_stream),
-            )
-            conn.commit()
-            cursor.execute("SELECT LAST_INSERT_ID()")
-            chat_id = cursor.fetchone()[0]
-            print("Chat history inserted successfully.")
-            return chat_id
-        except mysql.connector.Error as err:
-            print(f"Error: {err}")
-            return None
-        finally:
-            cursor.close()
-            conn.close()
-
-    @staticmethod
-    def get_latest_chat_id() -> int:
-        """
-        Retrieve the latest chat_id from the ChatDB.Chat_history table.
-        :return: The latest chat_id or None if no chat_id exists.
-
-        Example:
-        >>> chat_id = ChatDB.get_latest_chat_id()
-        >>> isinstance(chat_id, int)
-        True
-        """
-        try:
-            conn = mysql.connector.connect(**db_config)
-            cursor = conn.cursor()
-            cursor.execute(
-                """
-                SELECT chat_id FROM ChatDB.Chat_history
-                ORDER BY chat_id DESC LIMIT 1
-                """
-            )
-            chat_id = cursor.fetchone()[0]
-            return chat_id if chat_id else None
-        except mysql.connector.Error as err:
-            print(f"Error: {err}")
-            return None
-        finally:
-            cursor.close()
-            conn.close()
-
-    @staticmethod
-    def insert_chat_data(
-        chat_id: int, user_message: str, assistant_message: str
-    ) -> None:
-        """
-        Insert a new row into the ChatDB.Chat_data table.
-
-        Example:
-        >>> ChatDB.insert_chat_data(1, 'Hello', 'Hi there!')
-        Chat data inserted successfully.
-        """
-        try:
-            conn = mysql.connector.connect(**db_config)
-            cursor = conn.cursor()
-            cursor.execute(
-                """
-                INSERT INTO ChatDB.Chat_data (chat_id, user, assistant)
-                VALUES (%s, %s, %s)
-                """,
-                (chat_id, user_message, assistant_message),
-            )
-            conn.commit()
-            print("Chat data inserted successfully.")
-        except mysql.connector.Error as err:
-            print(f"Error: {err}")
-        finally:
-            cursor.close()
-            conn.close()
-
-
-class Chatbot:
-    def __init__(self, api_service: str):
-        self.llm_service = LLMService(api_service)
-        self.conversation_history = []
-        self.chat_id_pk = None
-        self.start_time = datetime.datetime.now(datetime.timezone.utc)
-
-    def chat_session(self) -> None:
-        """
-        Start a chatbot session, allowing the user to interact with the LLM.
-        Saves conversation history in the database and ends the session on "/stop" command.
-
-        Example:
-        >>> chatbot = Chatbot(api_service="Groq")
-        >>> chatbot.chat_session()  # This will be mocked in the tests
-        Welcome to the chatbot! Type '/stop' to end the conversation.
-        """
-        print("Welcome to the chatbot! Type '/stop' to end the conversation.")
-
-        while True:
-            user_input = input("\nYou: ").strip()
-            self.conversation_history.append({"role": "user", "content": user_input})
-
-            if self.chat_id_pk is None:
-                if user_input.lower() == "/stop":
-                    break
-                bot_response = self.llm_service.generate_response(
-                    self.conversation_history
-                )
-                self.conversation_history.append(
-                    {"role": "assistant", "content": bot_response}
-                )
-
-                is_stream = 1  # New conversation
-                self.chat_id_pk = ChatDB.insert_chat_history(
-                    self.start_time, is_stream
-                )  # Return the chat_id
-                if self.chat_id_pk:
-                    ChatDB.insert_chat_data(self.chat_id_pk, user_input, bot_response)
-            else:
-                if user_input.lower() == "/stop":
-                    is_stream = 2  # End of conversation
-                    current_time = datetime.datetime.now(datetime.timezone.utc)
-                    ChatDB.insert_chat_history(current_time, is_stream)
-                    break
-
-                bot_response = self.llm_service.generate_response(
-                    self.conversation_history
-                )
-                self.conversation_history.append(
-                    {"role": "assistant", "content": bot_response}
-                )
-
-                is_stream = 0  # Continuation of conversation
-                current_time = datetime.datetime.now(datetime.timezone.utc)
-                ChatDB.insert_chat_history(current_time, is_stream)
-                ChatDB.insert_chat_data(self.chat_id_pk, user_input, bot_response)
-
-            if len(self.conversation_history) > 1000:
-                self.conversation_history = self.conversation_history[-3:]
-
-
-# Test cases for Chatbot
-class TestChatbot(unittest.TestCase):
-    @patch("builtins.input", side_effect=["Hello", "/stop"])
-    @patch("sys.stdout", new_callable=StringIO)
-    def test_chat_session(self, mock_stdout, mock_input):
-        """
-        Test the chat_session method for expected welcome message.
-        """
-        chatbot = Chatbot(api_service="Groq")
-        chatbot.chat_session()
-
-        # Check for the welcome message in the output
-        output = mock_stdout.getvalue().strip().splitlines()
-        self.assertIn(
-            "Welcome to the chatbot! Type '/stop' to end the conversation.", output
-        )
-        self.assertTrue(
-            any("Chat history inserted successfully." in line for line in output)
-        )
-        self.assertTrue(
-            any("Chat data inserted successfully." in line for line in output)
-        )
-
-
-if __name__ == "__main__":
-    #
-    ChatDB.create_tables()
-    unittest.main()
diff --git a/neural_network/chatbot/requirements.txt b/neural_network/chatbot/requirements.txt
deleted file mode 100644
index 0f1204243a5d..000000000000
--- a/neural_network/chatbot/requirements.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-aiohappyeyeballs==2.4.2
-aiohttp==3.10.8
-aiosignal==1.3.1
-annotated-types==0.7.0
-anyio==4.6.0
-asgiref==3.8.1
-attrs==24.2.0
-black==24.10.0
-certifi==2024.8.30
-cfgv==3.4.0
-charset-normalizer==3.3.2
-click==8.1.7
-distlib==0.3.9
-distro==1.9.0
-Django==5.1.1
-djangorestframework==3.15.2
-eval_type_backport==0.2.0
-filelock==3.16.1
-frozenlist==1.4.1
-groq==0.11.0
-h11==0.14.0
-httpcore==1.0.5
-httpx==0.27.2
-identify==2.6.1
-idna==3.10
-markdown-it-py==3.0.0
-mdurl==0.1.2
-multidict==6.1.0
-mypy-extensions==1.0.0
-mysql-connector-python==9.0.0
-nodeenv==1.9.1
-numpy==2.1.1
-packaging==24.1
-pathspec==0.12.1
-pillow==10.4.0
-platformdirs==4.3.6
-pre_commit==4.0.1
-pyarrow==17.0.0
-pydantic==2.9.2
-pydantic_core==2.23.4
-Pygments==2.18.0
-python-dotenv==1.0.1
-PyYAML==6.0.2
-requests==2.32.3
-rich==13.8.1
-ruff==0.7.0
-shellingham==1.5.4
-sniffio==1.3.1
-sqlparse==0.5.1
-tabulate==0.9.0
-together==1.3.0
-tqdm==4.66.5
-typer==0.12.5
-typing_extensions==4.12.2
-urllib3==2.2.3
-virtualenv==20.27.0
-yarl==1.13.1

From b23cc1a5158d4245060a81c2e71ac217af1c6b28 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 22:00:48 +0530
Subject: [PATCH 22/27] Add files via upload

---
 neural_network/sliding_window_attention.py | 94 ++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 neural_network/sliding_window_attention.py

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
new file mode 100644
index 000000000000..a886ff6fa616
--- /dev/null
+++ b/neural_network/sliding_window_attention.py
@@ -0,0 +1,94 @@
+"""
+ - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
+Name - - Sliding Window Attention Mechanism
+Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks.
+Detail: Total 5 layers neural network
+        * Input layer
+        * Sliding Window Attention Layer
+        * Feedforward Layer
+        * Output Layer
+Author: Stephen Lee
+Github: 245885195@qq.com
+Date: 2024.10.20
+References:
+    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
+    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*.
+ - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
+"""
+
+import numpy as np
+
+
+class SlidingWindowAttention:
+    """Sliding Window Attention Module.
+
+    This class implements a sliding window attention mechanism where the model
+    attends to a fixed-size window of context around each token.
+
+    Attributes:
+        window_size (int): The size of the attention window.
+        embed_dim (int): The dimensionality of the input embeddings.
+    """
+
+    def __init__(self, embed_dim: int, window_size: int):
+        """
+        Initialize the SlidingWindowAttention module.
+
+        Args:
+            embed_dim (int): The dimensionality of the input embeddings.
+            window_size (int): The size of the attention window.
+        """
+        self.window_size = window_size
+        self.embed_dim = embed_dim
+        self.attention_weights = np.random.randn(embed_dim, embed_dim)
+
+    def forward(self, x: np.ndarray) -> np.ndarray:
+        """
+        Forward pass for the sliding window attention.
+
+        Args:
+            x (np.ndarray): Input tensor of shape (batch_size, seq_length, embed_dim).
+
+        Returns:
+            np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
+
+        >>> x = np.random.randn(2, 10, 4)  # Batch size 2, sequence length 10, embedding dimension 4
+        >>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
+        >>> output = attention.forward(x)
+        >>> output.shape
+        (2, 10, 4)
+        >>> (output.sum() != 0).item()  # Check if output is non-zero
+        True
+        """
+        batch_size, seq_length, _ = x.shape
+        output = np.zeros_like(x)
+
+        for i in range(seq_length):
+            # Define the window range
+            start = max(0, i - self.window_size // 2)
+            end = min(seq_length, i + self.window_size // 2 + 1)
+
+            # Extract the local window
+            local_window = x[:, start:end, :]
+
+            # Compute attention scores
+            attention_scores = np.matmul(local_window, self.attention_weights)
+
+            # Average the attention scores
+            output[:, i, :] = np.mean(attention_scores, axis=1)
+
+        return output
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
+    # Example usage
+    x = np.random.randn(
+        2, 10, 4
+    )  # Batch size 2, sequence length 10, embedding dimension 4
+    attention = SlidingWindowAttention(embed_dim=4, window_size=3)
+    output = attention.forward(x)
+    print(output)

From b3c2a73a108b0db35d7bcf60c5c1f27ca6ff19cf Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 22:05:16 +0530
Subject: [PATCH 23/27] Add files via upload

---
 neural_network/sliding_window_attention.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
index a886ff6fa616..8d4203ba6a67 100644
--- a/neural_network/sliding_window_attention.py
+++ b/neural_network/sliding_window_attention.py
@@ -1,6 +1,6 @@
 """
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
-Name - - Sliding Window Attention Mechanism
+Name - - sliding_window_attention.py
 Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks.
 Detail: Total 5 layers neural network
         * Input layer
@@ -13,6 +13,7 @@
 References:
     1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
     2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*.
+    3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 """
 

From 9e9a3131a364ceb93832756dcdfda2e726a5cadd Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 22:15:04 +0530
Subject: [PATCH 24/27] Add files via upload

---
 neural_network/sliding_window_attention.py | 32 ++++++++++++----------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
index 8d4203ba6a67..430ed0cd2e39 100644
--- a/neural_network/sliding_window_attention.py
+++ b/neural_network/sliding_window_attention.py
@@ -1,7 +1,8 @@
 """
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 Name - - sliding_window_attention.py
-Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks.
+Goal - - Implement a neural network architecture using sliding window attention for sequence 
+        modeling tasks.
 Detail: Total 5 layers neural network
         * Input layer
         * Sliding Window Attention Layer
@@ -11,8 +12,10 @@
 Github: 245885195@qq.com
 Date: 2024.10.20
 References:
-    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
-    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*.
+    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in 
+       Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
+    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers 
+       with Linear Attention." *arXiv preprint arXiv:2006.16236*.
     3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 """
@@ -31,7 +34,7 @@ class SlidingWindowAttention:
         embed_dim (int): The dimensionality of the input embeddings.
     """
 
-    def __init__(self, embed_dim: int, window_size: int):
+    def __init__(self, embed_dim: int, window_size: int) -> None:
         """
         Initialize the SlidingWindowAttention module.
 
@@ -41,14 +44,16 @@ def __init__(self, embed_dim: int, window_size: int):
         """
         self.window_size = window_size
         self.embed_dim = embed_dim
-        self.attention_weights = np.random.randn(embed_dim, embed_dim)
+        rng = np.random.default_rng()
+        self.attention_weights = rng.standard_normal((embed_dim, embed_dim))
 
-    def forward(self, x: np.ndarray) -> np.ndarray:
+    def forward(self, input_tensor: np.ndarray) -> np.ndarray:
         """
         Forward pass for the sliding window attention.
 
         Args:
-            x (np.ndarray): Input tensor of shape (batch_size, seq_length, embed_dim).
+            input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length, 
+                                       embed_dim).
 
         Returns:
             np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
@@ -61,8 +66,8 @@ def forward(self, x: np.ndarray) -> np.ndarray:
         >>> (output.sum() != 0).item()  # Check if output is non-zero
         True
         """
-        batch_size, seq_length, _ = x.shape
-        output = np.zeros_like(x)
+        batch_size, seq_length, _ = input_tensor.shape
+        output = np.zeros_like(input_tensor)
 
         for i in range(seq_length):
             # Define the window range
@@ -70,7 +75,7 @@ def forward(self, x: np.ndarray) -> np.ndarray:
             end = min(seq_length, i + self.window_size // 2 + 1)
 
             # Extract the local window
-            local_window = x[:, start:end, :]
+            local_window = input_tensor[:, start:end, :]
 
             # Compute attention scores
             attention_scores = np.matmul(local_window, self.attention_weights)
@@ -86,10 +91,9 @@ def forward(self, x: np.ndarray) -> np.ndarray:
 
     doctest.testmod()
 
-    # Example usage
-    x = np.random.randn(
-        2, 10, 4
-    )  # Batch size 2, sequence length 10, embedding dimension 4
+    # usage
+    rng = np.random.default_rng()
+    x = rng.standard_normal((2, 10, 4))  # Batch size 2, sequence length 10, embedding dimension 4
     attention = SlidingWindowAttention(embed_dim=4, window_size=3)
     output = attention.forward(x)
     print(output)

From 041571772eb96456e60184cfdd4e4b21e94f4399 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Oct 2024 16:46:42 +0000
Subject: [PATCH 25/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/sliding_window_attention.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
index 430ed0cd2e39..b329e4fce910 100644
--- a/neural_network/sliding_window_attention.py
+++ b/neural_network/sliding_window_attention.py
@@ -1,7 +1,7 @@
 """
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 Name - - sliding_window_attention.py
-Goal - - Implement a neural network architecture using sliding window attention for sequence 
+Goal - - Implement a neural network architecture using sliding window attention for sequence
         modeling tasks.
 Detail: Total 5 layers neural network
         * Input layer
@@ -12,9 +12,9 @@
 Github: 245885195@qq.com
 Date: 2024.10.20
 References:
-    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in 
+    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
        Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
-    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers 
+    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
        with Linear Attention." *arXiv preprint arXiv:2006.16236*.
     3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
@@ -52,7 +52,7 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
         Forward pass for the sliding window attention.
 
         Args:
-            input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length, 
+            input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
                                        embed_dim).
 
         Returns:
@@ -93,7 +93,9 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
 
     # usage
     rng = np.random.default_rng()
-    x = rng.standard_normal((2, 10, 4))  # Batch size 2, sequence length 10, embedding dimension 4
+    x = rng.standard_normal(
+        (2, 10, 4)
+    )  # Batch size 2, sequence length 10, embedding dimension 4
     attention = SlidingWindowAttention(embed_dim=4, window_size=3)
     output = attention.forward(x)
     print(output)

From 3b8848430c1c0f8404b0404857b7debaa08e7473 Mon Sep 17 00:00:00 2001
From: Pritam Das <69068731+Pritam3355@users.noreply.github.com>
Date: Sun, 20 Oct 2024 22:21:22 +0530
Subject: [PATCH 26/27] Add files via upload

---
 neural_network/sliding_window_attention.py | 28 +++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
index b329e4fce910..54d3fec2cd34 100644
--- a/neural_network/sliding_window_attention.py
+++ b/neural_network/sliding_window_attention.py
@@ -1,8 +1,8 @@
 """
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 Name - - sliding_window_attention.py
-Goal - - Implement a neural network architecture using sliding window attention for sequence
-        modeling tasks.
+Goal - - Implement a neural network architecture using sliding 
+        window attention for sequence modeling tasks.
 Detail: Total 5 layers neural network
         * Input layer
         * Sliding Window Attention Layer
@@ -12,10 +12,12 @@
 Github: 245885195@qq.com
 Date: 2024.10.20
 References:
-    1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
-       Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
-    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
-       with Linear Attention." *arXiv preprint arXiv:2006.16236*.
+    1. Choromanska, A., et al. (2020). "On the Importance of 
+       Initialization and Momentum in Deep Learning." *Proceedings 
+       of the 37th International Conference on Machine Learning*.
+    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast 
+       Autoregressive Transformers with Linear Attention." 
+       *arXiv preprint arXiv:2006.16236*.
     3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 """
@@ -26,8 +28,8 @@
 class SlidingWindowAttention:
     """Sliding Window Attention Module.
 
-    This class implements a sliding window attention mechanism where the model
-    attends to a fixed-size window of context around each token.
+    This class implements a sliding window attention mechanism where 
+    the model attends to a fixed-size window of context around each token.
 
     Attributes:
         window_size (int): The size of the attention window.
@@ -52,13 +54,13 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
         Forward pass for the sliding window attention.
 
         Args:
-            input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
-                                       embed_dim).
+            input_tensor (np.ndarray): Input tensor of shape (batch_size, 
+                                       seq_length, embed_dim).
 
         Returns:
             np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
 
-        >>> x = np.random.randn(2, 10, 4)  # Batch size 2, sequence length 10, embedding dimension 4
+        >>> x = np.random.randn(2, 10, 4)  # Batch size 2, sequence 
         >>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
         >>> output = attention.forward(x)
         >>> output.shape
@@ -93,9 +95,7 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
 
     # usage
     rng = np.random.default_rng()
-    x = rng.standard_normal(
-        (2, 10, 4)
-    )  # Batch size 2, sequence length 10, embedding dimension 4
+    x = rng.standard_normal((2, 10, 4))  # Batch size 2, 
     attention = SlidingWindowAttention(embed_dim=4, window_size=3)
     output = attention.forward(x)
     print(output)

From 4f573e0d8d10ccbe745f0bb7fdd608a8adec7002 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Oct 2024 16:52:44 +0000
Subject: [PATCH 27/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_network/sliding_window_attention.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py
index 54d3fec2cd34..319895db8131 100644
--- a/neural_network/sliding_window_attention.py
+++ b/neural_network/sliding_window_attention.py
@@ -1,7 +1,7 @@
 """
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
 Name - - sliding_window_attention.py
-Goal - - Implement a neural network architecture using sliding 
+Goal - - Implement a neural network architecture using sliding
         window attention for sequence modeling tasks.
 Detail: Total 5 layers neural network
         * Input layer
@@ -12,11 +12,11 @@
 Github: 245885195@qq.com
 Date: 2024.10.20
 References:
-    1. Choromanska, A., et al. (2020). "On the Importance of 
-       Initialization and Momentum in Deep Learning." *Proceedings 
+    1. Choromanska, A., et al. (2020). "On the Importance of
+       Initialization and Momentum in Deep Learning." *Proceedings
        of the 37th International Conference on Machine Learning*.
-    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast 
-       Autoregressive Transformers with Linear Attention." 
+    2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast
+       Autoregressive Transformers with Linear Attention."
        *arXiv preprint arXiv:2006.16236*.
     3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
  - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
@@ -28,7 +28,7 @@
 class SlidingWindowAttention:
     """Sliding Window Attention Module.
 
-    This class implements a sliding window attention mechanism where 
+    This class implements a sliding window attention mechanism where
     the model attends to a fixed-size window of context around each token.
 
     Attributes:
@@ -54,13 +54,13 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
         Forward pass for the sliding window attention.
 
         Args:
-            input_tensor (np.ndarray): Input tensor of shape (batch_size, 
+            input_tensor (np.ndarray): Input tensor of shape (batch_size,
                                        seq_length, embed_dim).
 
         Returns:
             np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
 
-        >>> x = np.random.randn(2, 10, 4)  # Batch size 2, sequence 
+        >>> x = np.random.randn(2, 10, 4)  # Batch size 2, sequence
         >>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
         >>> output = attention.forward(x)
         >>> output.shape
@@ -95,7 +95,7 @@ def forward(self, input_tensor: np.ndarray) -> np.ndarray:
 
     # usage
     rng = np.random.default_rng()
-    x = rng.standard_normal((2, 10, 4))  # Batch size 2, 
+    x = rng.standard_normal((2, 10, 4))  # Batch size 2,
     attention = SlidingWindowAttention(embed_dim=4, window_size=3)
     output = attention.forward(x)
     print(output)