From 211247ef82fd54540e4cb832fbbb612ca5845700 Mon Sep 17 00:00:00 2001
From: Amir Lavasani <amirm.lavasani@gmail.com>
Date: Mon, 25 Sep 2023 00:38:51 +0330
Subject: [PATCH 1/9] Add MFCC Feature Extraction Algorithm (#9057)

* Add MFCC feature extraction to machine learning

* Add standalone usage in comments

* Apply suggestions from code review

Co-authored-by: Christian Clauss <cclauss@me.com>

* Delete empty junk file (#9062)

* updating DIRECTORY.md

* updating DIRECTORY.md

* Delete empty junk file

* updating DIRECTORY.md

* Fix ruff errors

* Fix more ruff errors

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>

* [main] Fix typo due to auto review change

* Add doctests for all functions

* Add MFCC feature extraction to machine learning

* Add standalone usage in comments

* Apply suggestions from code review

Co-authored-by: Christian Clauss <cclauss@me.com>

* [main] Fix typo due to auto review change

* Add doctests for all functions

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix some pre-commit issues

* Update review issues
* Remove types from docstring
* Rename dct
* Add mfcc docstring
* Add typing to several functions

* Apply suggestions from code review

* Update mfcc.py

* get_filter_points() -> tuple[np.ndarray, np.ndarray]:

* algorithm

---------

Co-authored-by: Christian Clauss <cclauss@me.com>
Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com>
Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 machine_learning/mfcc.py | 479 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 479 insertions(+)
 create mode 100644 machine_learning/mfcc.py

diff --git a/machine_learning/mfcc.py b/machine_learning/mfcc.py
new file mode 100644
index 000000000000..7ce8ceb50ff2
--- /dev/null
+++ b/machine_learning/mfcc.py
@@ -0,0 +1,479 @@
+"""
+Mel Frequency Cepstral Coefficients (MFCC) Calculation
+
+MFCC is an algorithm widely used in audio and speech processing to represent the
+short-term power spectrum of a sound signal in a more compact and
+discriminative way. It is particularly popular in speech and audio processing
+tasks such as speech recognition and speaker identification.
+
+How Mel Frequency Cepstral Coefficients are Calculated:
+1. Preprocessing:
+   - Load an audio signal and normalize it to ensure that the values fall
+     within a specific range (e.g., between -1 and 1).
+   - Frame the audio signal into overlapping, fixed-length segments, typically
+     using a technique like windowing to reduce spectral leakage.
+
+2. Fourier Transform:
+   - Apply a Fast Fourier Transform (FFT) to each audio frame to convert it
+     from the time domain to the frequency domain. This results in a
+     representation of the audio frame as a sequence of frequency components.
+
+3. Power Spectrum:
+   - Calculate the power spectrum by taking the squared magnitude of each
+     frequency component obtained from the FFT. This step measures the energy
+     distribution across different frequency bands.
+
+4. Mel Filterbank:
+   - Apply a set of triangular filterbanks spaced in the Mel frequency scale
+     to the power spectrum. These filters mimic the human auditory system's
+     frequency response. Each filterbank sums the power spectrum values within
+     its band.
+
+5. Logarithmic Compression:
+   - Take the logarithm (typically base 10) of the filterbank values to
+     compress the dynamic range. This step mimics the logarithmic response of
+     the human ear to sound intensity.
+
+6. Discrete Cosine Transform (DCT):
+   - Apply the Discrete Cosine Transform to the log filterbank energies to
+     obtain the MFCC coefficients. This transformation helps decorrelate the
+     filterbank energies and captures the most important features of the audio
+     signal.
+
+7. Feature Extraction:
+   - Select a subset of the DCT coefficients to form the feature vector.
+     Often, the first few coefficients (e.g., 12-13) are used for most
+     applications.
+
+References:
+- Mel-Frequency Cepstral Coefficients (MFCCs):
+  https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
+- Speech and Language Processing by Daniel Jurafsky & James H. Martin:
+  https://web.stanford.edu/~jurafsky/slp3/
+- Mel Frequency Cepstral Coefficient (MFCC) tutorial
+  http://practicalcryptography.com/miscellaneous/machine-learning
+  /guide-mel-frequency-cepstral-coefficients-mfccs/
+
+Author: Amir Lavasani
+"""
+
+
+import logging
+
+import numpy as np
+import scipy.fftpack as fft
+from scipy.signal import get_window
+
+logging.basicConfig(filename=f"{__file__}.log", level=logging.INFO)
+
+
+def mfcc(
+    audio: np.ndarray,
+    sample_rate: int,
+    ftt_size: int = 1024,
+    hop_length: int = 20,
+    mel_filter_num: int = 10,
+    dct_filter_num: int = 40,
+) -> np.ndarray:
+    """
+    Calculate Mel Frequency Cepstral Coefficients (MFCCs) from an audio signal.
+
+    Args:
+        audio: The input audio signal.
+        sample_rate: The sample rate of the audio signal (in Hz).
+        ftt_size: The size of the FFT window (default is 1024).
+        hop_length: The hop length for frame creation (default is 20ms).
+        mel_filter_num: The number of Mel filters (default is 10).
+        dct_filter_num: The number of DCT filters (default is 40).
+
+    Returns:
+        A matrix of MFCCs for the input audio.
+
+    Raises:
+        ValueError: If the input audio is empty.
+
+    Example:
+    >>> sample_rate = 44100  # Sample rate of 44.1 kHz
+    >>> duration = 2.0  # Duration of 1 second
+    >>> t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
+    >>> audio = 0.5 * np.sin(2 * np.pi * 440.0 * t)  # Generate a 440 Hz sine wave
+    >>> mfccs = mfcc(audio, sample_rate)
+    >>> mfccs.shape
+    (40, 101)
+    """
+    logging.info(f"Sample rate: {sample_rate}Hz")
+    logging.info(f"Audio duration: {len(audio) / sample_rate}s")
+    logging.info(f"Audio min: {np.min(audio)}")
+    logging.info(f"Audio max: {np.max(audio)}")
+
+    # normalize audio
+    audio_normalized = normalize(audio)
+
+    logging.info(f"Normalized audio min: {np.min(audio_normalized)}")
+    logging.info(f"Normalized audio max: {np.max(audio_normalized)}")
+
+    # frame audio into
+    audio_framed = audio_frames(
+        audio_normalized, sample_rate, ftt_size=ftt_size, hop_length=hop_length
+    )
+
+    logging.info(f"Framed audio shape: {audio_framed.shape}")
+    logging.info(f"First frame: {audio_framed[0]}")
+
+    # convert to frequency domain
+    # For simplicity we will choose the Hanning window.
+    window = get_window("hann", ftt_size, fftbins=True)
+    audio_windowed = audio_framed * window
+
+    logging.info(f"Windowed audio shape: {audio_windowed.shape}")
+    logging.info(f"First frame: {audio_windowed[0]}")
+
+    audio_fft = calculate_fft(audio_windowed, ftt_size)
+    logging.info(f"fft audio shape: {audio_fft.shape}")
+    logging.info(f"First frame: {audio_fft[0]}")
+
+    audio_power = calculate_signal_power(audio_fft)
+    logging.info(f"power audio shape: {audio_power.shape}")
+    logging.info(f"First frame: {audio_power[0]}")
+
+    filters = mel_spaced_filterbank(sample_rate, mel_filter_num, ftt_size)
+    logging.info(f"filters shape: {filters.shape}")
+
+    audio_filtered = np.dot(filters, np.transpose(audio_power))
+    audio_log = 10.0 * np.log10(audio_filtered)
+    logging.info(f"audio_log shape: {audio_log.shape}")
+
+    dct_filters = discrete_cosine_transform(dct_filter_num, mel_filter_num)
+    cepstral_coefficents = np.dot(dct_filters, audio_log)
+
+    logging.info(f"cepstral_coefficents shape: {cepstral_coefficents.shape}")
+    return cepstral_coefficents
+
+
+def normalize(audio: np.ndarray) -> np.ndarray:
+    """
+    Normalize an audio signal by scaling it to have values between -1 and 1.
+
+    Args:
+        audio: The input audio signal.
+
+    Returns:
+        The normalized audio signal.
+
+    Examples:
+    >>> audio = np.array([1, 2, 3, 4, 5])
+    >>> normalized_audio = normalize(audio)
+    >>> np.max(normalized_audio)
+    1.0
+    >>> np.min(normalized_audio)
+    0.2
+    """
+    # Divide the entire audio signal by the maximum absolute value
+    return audio / np.max(np.abs(audio))
+
+
+def audio_frames(
+    audio: np.ndarray,
+    sample_rate: int,
+    hop_length: int = 20,
+    ftt_size: int = 1024,
+) -> np.ndarray:
+    """
+    Split an audio signal into overlapping frames.
+
+    Args:
+        audio: The input audio signal.
+        sample_rate: The sample rate of the audio signal.
+        hop_length: The length of the hopping (default is 20ms).
+        ftt_size: The size of the FFT window (default is 1024).
+
+    Returns:
+        An array of overlapping frames.
+
+    Examples:
+    >>> audio = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]*1000)
+    >>> sample_rate = 8000
+    >>> frames = audio_frames(audio, sample_rate, hop_length=10, ftt_size=512)
+    >>> frames.shape
+    (126, 512)
+    """
+
+    hop_size = np.round(sample_rate * hop_length / 1000).astype(int)
+
+    # Pad the audio signal to handle edge cases
+    audio = np.pad(audio, int(ftt_size / 2), mode="reflect")
+
+    # Calculate the number of frames
+    frame_count = int((len(audio) - ftt_size) / hop_size) + 1
+
+    # Initialize an array to store the frames
+    frames = np.zeros((frame_count, ftt_size))
+
+    # Split the audio signal into frames
+    for n in range(frame_count):
+        frames[n] = audio[n * hop_size : n * hop_size + ftt_size]
+
+    return frames
+
+
+def calculate_fft(audio_windowed: np.ndarray, ftt_size: int = 1024) -> np.ndarray:
+    """
+    Calculate the Fast Fourier Transform (FFT) of windowed audio data.
+
+    Args:
+        audio_windowed: The windowed audio signal.
+        ftt_size: The size of the FFT (default is 1024).
+
+    Returns:
+        The FFT of the audio data.
+
+    Examples:
+    >>> audio_windowed = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    >>> audio_fft = calculate_fft(audio_windowed, ftt_size=4)
+    >>> np.allclose(audio_fft[0], np.array([6.0+0.j, -1.5+0.8660254j, -1.5-0.8660254j]))
+    True
+    """
+    # Transpose the audio data to have time in rows and channels in columns
+    audio_transposed = np.transpose(audio_windowed)
+
+    # Initialize an array to store the FFT results
+    audio_fft = np.empty(
+        (int(1 + ftt_size // 2), audio_transposed.shape[1]),
+        dtype=np.complex64,
+        order="F",
+    )
+
+    # Compute FFT for each channel
+    for n in range(audio_fft.shape[1]):
+        audio_fft[:, n] = fft.fft(audio_transposed[:, n], axis=0)[: audio_fft.shape[0]]
+
+    # Transpose the FFT results back to the original shape
+    return np.transpose(audio_fft)
+
+
+def calculate_signal_power(audio_fft: np.ndarray) -> np.ndarray:
+    """
+    Calculate the power of the audio signal from its FFT.
+
+    Args:
+        audio_fft: The FFT of the audio signal.
+
+    Returns:
+        The power of the audio signal.
+
+    Examples:
+    >>> audio_fft = np.array([1+2j, 2+3j, 3+4j, 4+5j])
+    >>> power = calculate_signal_power(audio_fft)
+    >>> np.allclose(power, np.array([5, 13, 25, 41]))
+    True
+    """
+    # Calculate the power by squaring the absolute values of the FFT coefficients
+    return np.square(np.abs(audio_fft))
+
+
+def freq_to_mel(freq: float) -> float:
+    """
+    Convert a frequency in Hertz to the mel scale.
+
+    Args:
+        freq: The frequency in Hertz.
+
+    Returns:
+        The frequency in mel scale.
+
+    Examples:
+    >>> round(freq_to_mel(1000), 2)
+    999.99
+    """
+    # Use the formula to convert frequency to the mel scale
+    return 2595.0 * np.log10(1.0 + freq / 700.0)
+
+
+def mel_to_freq(mels: float) -> float:
+    """
+    Convert a frequency in the mel scale to Hertz.
+
+    Args:
+        mels: The frequency in mel scale.
+
+    Returns:
+        The frequency in Hertz.
+
+    Examples:
+    >>> round(mel_to_freq(999.99), 2)
+    1000.01
+    """
+    # Use the formula to convert mel scale to frequency
+    return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)
+
+
+def mel_spaced_filterbank(
+    sample_rate: int, mel_filter_num: int = 10, ftt_size: int = 1024
+) -> np.ndarray:
+    """
+    Create a Mel-spaced filter bank for audio processing.
+
+    Args:
+        sample_rate: The sample rate of the audio.
+        mel_filter_num: The number of mel filters (default is 10).
+        ftt_size: The size of the FFT (default is 1024).
+
+    Returns:
+        Mel-spaced filter bank.
+
+    Examples:
+    >>> round(mel_spaced_filterbank(8000, 10, 1024)[0][1], 10)
+    0.0004603981
+    """
+    freq_min = 0
+    freq_high = sample_rate // 2
+
+    logging.info(f"Minimum frequency: {freq_min}")
+    logging.info(f"Maximum frequency: {freq_high}")
+
+    # Calculate filter points and mel frequencies
+    filter_points, mel_freqs = get_filter_points(
+        sample_rate,
+        freq_min,
+        freq_high,
+        mel_filter_num,
+        ftt_size,
+    )
+
+    filters = get_filters(filter_points, ftt_size)
+
+    # normalize filters
+    # taken from the librosa library
+    enorm = 2.0 / (mel_freqs[2 : mel_filter_num + 2] - mel_freqs[:mel_filter_num])
+    return filters * enorm[:, np.newaxis]
+
+
+def get_filters(filter_points: np.ndarray, ftt_size: int) -> np.ndarray:
+    """
+    Generate filters for audio processing.
+
+    Args:
+        filter_points: A list of filter points.
+        ftt_size: The size of the FFT.
+
+    Returns:
+        A matrix of filters.
+
+    Examples:
+    >>> get_filters(np.array([0, 20, 51, 95, 161, 256], dtype=int), 512).shape
+    (4, 257)
+    """
+    num_filters = len(filter_points) - 2
+    filters = np.zeros((num_filters, int(ftt_size / 2) + 1))
+
+    for n in range(num_filters):
+        start = filter_points[n]
+        mid = filter_points[n + 1]
+        end = filter_points[n + 2]
+
+        # Linearly increase values from 0 to 1
+        filters[n, start:mid] = np.linspace(0, 1, mid - start)
+
+        # Linearly decrease values from 1 to 0
+        filters[n, mid:end] = np.linspace(1, 0, end - mid)
+
+    return filters
+
+
+def get_filter_points(
+    sample_rate: int,
+    freq_min: int,
+    freq_high: int,
+    mel_filter_num: int = 10,
+    ftt_size: int = 1024,
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Calculate the filter points and frequencies for mel frequency filters.
+
+    Args:
+        sample_rate: The sample rate of the audio.
+        freq_min: The minimum frequency in Hertz.
+        freq_high: The maximum frequency in Hertz.
+        mel_filter_num: The number of mel filters (default is 10).
+        ftt_size: The size of the FFT (default is 1024).
+
+    Returns:
+        Filter points and corresponding frequencies.
+
+    Examples:
+    >>> filter_points = get_filter_points(8000, 0, 4000, mel_filter_num=4, ftt_size=512)
+    >>> filter_points[0]
+    array([  0,  20,  51,  95, 161, 256])
+    >>> filter_points[1]
+    array([   0.        ,  324.46707094,  799.33254207, 1494.30973963,
+           2511.42581671, 4000.        ])
+    """
+    # Convert minimum and maximum frequencies to mel scale
+    fmin_mel = freq_to_mel(freq_min)
+    fmax_mel = freq_to_mel(freq_high)
+
+    logging.info(f"MEL min: {fmin_mel}")
+    logging.info(f"MEL max: {fmax_mel}")
+
+    # Generate equally spaced mel frequencies
+    mels = np.linspace(fmin_mel, fmax_mel, num=mel_filter_num + 2)
+
+    # Convert mel frequencies back to Hertz
+    freqs = mel_to_freq(mels)
+
+    # Calculate filter points as integer values
+    filter_points = np.floor((ftt_size + 1) / sample_rate * freqs).astype(int)
+
+    return filter_points, freqs
+
+
+def discrete_cosine_transform(dct_filter_num: int, filter_num: int) -> np.ndarray:
+    """
+    Compute the Discrete Cosine Transform (DCT) basis matrix.
+
+    Args:
+        dct_filter_num: The number of DCT filters to generate.
+        filter_num: The number of the fbank filters.
+
+    Returns:
+        The DCT basis matrix.
+
+    Examples:
+    >>> round(discrete_cosine_transform(3, 5)[0][0], 5)
+    0.44721
+    """
+    basis = np.empty((dct_filter_num, filter_num))
+    basis[0, :] = 1.0 / np.sqrt(filter_num)
+
+    samples = np.arange(1, 2 * filter_num, 2) * np.pi / (2.0 * filter_num)
+
+    for i in range(1, dct_filter_num):
+        basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / filter_num)
+
+    return basis
+
+
+def example(wav_file_path: str = "./path-to-file/sample.wav") -> np.ndarray:
+    """
+    Example function to calculate Mel Frequency Cepstral Coefficients
+    (MFCCs) from an audio file.
+
+    Args:
+        wav_file_path: The path to the WAV audio file.
+
+    Returns:
+        np.ndarray: The computed MFCCs for the audio.
+    """
+    from scipy.io import wavfile
+
+    # Load the audio from the WAV file
+    sample_rate, audio = wavfile.read(wav_file_path)
+
+    # Calculate MFCCs
+    return mfcc(audio, sample_rate)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From eace4cea32b831a1683b4c431379f0cd7b9061db Mon Sep 17 00:00:00 2001
From: gudlu1925 <120262240+gudlu1925@users.noreply.github.com>
Date: Wed, 27 Sep 2023 11:14:06 +0530
Subject: [PATCH 2/9] Added Coulomb_Law (#8714)

* Create coulomb_law.py

* Update coulomb_law.py

* Update coulomb_law.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update and rename coulomb_law.py to coulombs_law.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update coulombs_law.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update coulombs_law.py

* Update coulombs_law.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update coulombs_law.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update coulombs_law.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com>
---
 physics/coulombs_law.py | 42 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 physics/coulombs_law.py

diff --git a/physics/coulombs_law.py b/physics/coulombs_law.py
new file mode 100644
index 000000000000..252e8ec0f74e
--- /dev/null
+++ b/physics/coulombs_law.py
@@ -0,0 +1,42 @@
+"""
+Coulomb's law states that the magnitude of the electrostatic force of attraction
+or repulsion between two point charges is directly proportional to the product
+of the magnitudes of charges and inversely proportional to the square of the
+distance between them.
+
+F = k * q1 * q2 / r^2
+
+k is Coulomb's constant and equals 1/(4π*ε0)
+q1 is charge of first body (C)
+q2 is charge of second body (C)
+r is distance between two charged bodies (m)
+
+Reference: https://en.wikipedia.org/wiki/Coulomb%27s_law
+"""
+
+
+def coulombs_law(q1: float, q2: float, radius: float) -> float:
+    """
+    Calculate the electrostatic force of attraction or repulsion
+    between two point charges
+
+    >>> coulombs_law(15.5, 20, 15)
+    12382849136.06
+    >>> coulombs_law(1, 15, 5)
+    5392531075.38
+    >>> coulombs_law(20, -50, 15)
+    -39944674632.44
+    >>> coulombs_law(-5, -8, 10)
+    3595020716.92
+    >>> coulombs_law(50, 100, 50)
+    17975103584.6
+    """
+    if radius <= 0:
+        raise ValueError("The radius is always a positive non zero integer")
+    return round(((8.9875517923 * 10**9) * q1 * q2) / (radius**2), 2)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From b2e186f4b769ae98d04f7f2408d3ac86da44c06f Mon Sep 17 00:00:00 2001
From: Okza Pradhana <okzamahendra29@gmail.com>
Date: Wed, 27 Sep 2023 13:06:19 +0700
Subject: [PATCH 3/9] feat(maths): add function to perform calculation (#6602)

* feat(maths): add function to perform calculation
- Add single function to calculate sum of two positive numbers
  using bitwise operator

* docs: add wikipedia url as explanation

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Apply suggestions from code review

Co-authored-by: Caeden Perelli-Harris <caedenperelliharris@gmail.com>

* Update sum_of_two_positive_numbers_bitwise.py

* Update sum_of_two_positive_numbers_bitwise.py

* Update sum_of_two_positive_numbers_bitwise.py

---------

Co-authored-by: Okza Pradhana <okzapradhana@azko-macbook.local>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com>
Co-authored-by: Caeden Perelli-Harris <caedenperelliharris@gmail.com>
---
 maths/sum_of_two_positive_numbers_bitwise.py | 55 ++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 maths/sum_of_two_positive_numbers_bitwise.py

diff --git a/maths/sum_of_two_positive_numbers_bitwise.py b/maths/sum_of_two_positive_numbers_bitwise.py
new file mode 100644
index 000000000000..70eaf6887b64
--- /dev/null
+++ b/maths/sum_of_two_positive_numbers_bitwise.py
@@ -0,0 +1,55 @@
+"""
+Calculates the sum of two non-negative integers using bitwise operators
+Wikipedia explanation: https://en.wikipedia.org/wiki/Binary_number
+"""
+
+
+def bitwise_addition_recursive(number: int, other_number: int) -> int:
+    """
+    >>> bitwise_addition_recursive(4, 5)
+    9
+    >>> bitwise_addition_recursive(8, 9)
+    17
+    >>> bitwise_addition_recursive(0, 4)
+    4
+    >>> bitwise_addition_recursive(4.5, 9)
+    Traceback (most recent call last):
+        ...
+    TypeError: Both arguments MUST be integers!
+    >>> bitwise_addition_recursive('4', 9)
+    Traceback (most recent call last):
+        ...
+    TypeError: Both arguments MUST be integers!
+    >>> bitwise_addition_recursive('4.5', 9)
+    Traceback (most recent call last):
+        ...
+    TypeError: Both arguments MUST be integers!
+    >>> bitwise_addition_recursive(-1, 9)
+    Traceback (most recent call last):
+        ...
+    ValueError: Both arguments MUST be non-negative!
+    >>> bitwise_addition_recursive(1, -9)
+    Traceback (most recent call last):
+        ...
+    ValueError: Both arguments MUST be non-negative!
+    """
+
+    if not isinstance(number, int) or not isinstance(other_number, int):
+        raise TypeError("Both arguments MUST be integers!")
+
+    if number < 0 or other_number < 0:
+        raise ValueError("Both arguments MUST be non-negative!")
+
+    bitwise_sum = number ^ other_number
+    carry = number & other_number
+
+    if carry == 0:
+        return bitwise_sum
+
+    return bitwise_addition_recursive(bitwise_sum, carry << 1)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From 84ec9414e45380a5e946d4f73b921b274ecd4be7 Mon Sep 17 00:00:00 2001
From: thor-harsh <105957576+thor-harsh@users.noreply.github.com>
Date: Wed, 27 Sep 2023 12:01:42 +0530
Subject: [PATCH 4/9] Update k_means_clust.py (#8996)

* Update k_means_clust.py

* Apply suggestions from code review

---------

Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com>
---
 machine_learning/k_means_clust.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/machine_learning/k_means_clust.py b/machine_learning/k_means_clust.py
index 7c8142aab878..d93c5addf2ee 100644
--- a/machine_learning/k_means_clust.py
+++ b/machine_learning/k_means_clust.py
@@ -11,10 +11,10 @@
   - initial_centroids , initial centroid values generated by utility function(mentioned
     in usage).
   - maxiter , maximum number of iterations to process.
-  - heterogeneity , empty list that will be filled with hetrogeneity values if passed
+  - heterogeneity , empty list that will be filled with heterogeneity values if passed
     to kmeans func.
 Usage:
-  1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
+  1. define 'k' value, 'X' features array and 'heterogeneity' empty list
   2. create initial_centroids,
         initial_centroids = get_initial_centroids(
             X,
@@ -31,8 +31,8 @@
             record_heterogeneity=heterogeneity,
             verbose=True # whether to print logs in console or not.(default=False)
             )
-  4. Plot the loss function, hetrogeneity values for every iteration saved in
-     hetrogeneity list.
+  4. Plot the loss function and heterogeneity values for every iteration saved in
+     heterogeneity list.
         plot_heterogeneity(
             heterogeneity,
             k
@@ -198,13 +198,10 @@ def report_generator(
     df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
 ) -> pd.DataFrame:
     """
-    Function generates easy-erading clustering report. It takes 2 arguments as an input:
-        DataFrame - dataframe with predicted cluester column;
-        FillMissingReport - dictionary of rules how we are going to fill missing
-        values of for final report generate (not included in modeling);
-    in order to run the function following libraries must be imported:
-        import pandas as pd
-        import numpy as np
+    Generates a clustering report. This function takes 2 arguments as input:
+        df - dataframe with predicted cluster column
+        fill_missing_report - dictionary of rules on how we are going to fill in missing
+        values for final generated report (not included in modelling);
     >>> data = pd.DataFrame()
     >>> data['numbers'] = [1, 2, 3]
     >>> data['col1'] = [0.5, 2.5, 4.5]
@@ -306,10 +303,10 @@ def report_generator(
     a.columns = report.columns  # rename columns to match report
     report = report.drop(
         report[report.Type == "count"].index
-    )  # drop count values except cluster size
+    )  # drop count values except for cluster size
     report = pd.concat(
         [report, a, clustersize, clusterproportion], axis=0
-    )  # concat report with clustert size and nan values
+    )  # concat report with cluster size and nan values
     report["Mark"] = report["Features"].isin(clustering_variables)
     cols = report.columns.tolist()
     cols = cols[0:2] + cols[-1:] + cols[2:-1]

From 5830b29e7ecf5437ce46bcdefda88eedea693043 Mon Sep 17 00:00:00 2001
From: Tianyi Zheng <tianyizheng02@gmail.com>
Date: Wed, 27 Sep 2023 08:00:34 -0400
Subject: [PATCH 5/9] Fix `mypy` errors in `erosion_operation.py` (#8603)

* updating DIRECTORY.md

* Fix mypy errors in erosion_operation.py

* Rename functions to use snake case

* updating DIRECTORY.md

* updating DIRECTORY.md

* Replace raw file string with pathlib Path

* Fix function name in erosion_operation.py doctest

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
---
 .../erosion_operation.py                      | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/digital_image_processing/morphological_operations/erosion_operation.py b/digital_image_processing/morphological_operations/erosion_operation.py
index c0e1ef847237..53001da83468 100644
--- a/digital_image_processing/morphological_operations/erosion_operation.py
+++ b/digital_image_processing/morphological_operations/erosion_operation.py
@@ -1,34 +1,37 @@
+from pathlib import Path
+
 import numpy as np
 from PIL import Image
 
 
-def rgb2gray(rgb: np.array) -> np.array:
+def rgb_to_gray(rgb: np.ndarray) -> np.ndarray:
     """
     Return gray image from rgb image
-    >>> rgb2gray(np.array([[[127, 255, 0]]]))
+
+    >>> rgb_to_gray(np.array([[[127, 255, 0]]]))
     array([[187.6453]])
-    >>> rgb2gray(np.array([[[0, 0, 0]]]))
+    >>> rgb_to_gray(np.array([[[0, 0, 0]]]))
     array([[0.]])
-    >>> rgb2gray(np.array([[[2, 4, 1]]]))
+    >>> rgb_to_gray(np.array([[[2, 4, 1]]]))
     array([[3.0598]])
-    >>> rgb2gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
+    >>> rgb_to_gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
     array([[159.0524,  90.0635, 117.6989]])
     """
     r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
     return 0.2989 * r + 0.5870 * g + 0.1140 * b
 
 
-def gray2binary(gray: np.array) -> np.array:
+def gray_to_binary(gray: np.ndarray) -> np.ndarray:
     """
     Return binary image from gray image
 
-    >>> gray2binary(np.array([[127, 255, 0]]))
+    >>> gray_to_binary(np.array([[127, 255, 0]]))
     array([[False,  True, False]])
-    >>> gray2binary(np.array([[0]]))
+    >>> gray_to_binary(np.array([[0]]))
     array([[False]])
-    >>> gray2binary(np.array([[26.2409, 4.9315, 1.4729]]))
+    >>> gray_to_binary(np.array([[26.2409, 4.9315, 1.4729]]))
     array([[False, False, False]])
-    >>> gray2binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
+    >>> gray_to_binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
     array([[False,  True, False],
            [False,  True, False],
            [False,  True, False]])
@@ -36,9 +39,10 @@ def gray2binary(gray: np.array) -> np.array:
     return (gray > 127) & (gray <= 255)
 
 
-def erosion(image: np.array, kernel: np.array) -> np.array:
+def erosion(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
     """
     Return eroded image
+
     >>> erosion(np.array([[True, True, False]]), np.array([[0, 1, 0]]))
     array([[False, False, False]])
     >>> erosion(np.array([[True, False, False]]), np.array([[1, 1, 0]]))
@@ -62,14 +66,17 @@ def erosion(image: np.array, kernel: np.array) -> np.array:
     return output
 
 
-# kernel to be applied
-structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
-
 if __name__ == "__main__":
     # read original image
-    image = np.array(Image.open(r"..\image_data\lena.jpg"))
+    lena_path = Path(__file__).resolve().parent / "image_data" / "lena.jpg"
+    lena = np.array(Image.open(lena_path))
+
+    # kernel to be applied
+    structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
+
     # Apply erosion operation to a binary image
-    output = erosion(gray2binary(rgb2gray(image)), structuring_element)
+    output = erosion(gray_to_binary(rgb_to_gray(lena)), structuring_element)
+
     # Save the output image
     pil_img = Image.fromarray(output).convert("RGB")
     pil_img.save("result_erosion.png")

From 76767d2f09d15aeff0a54cfc44652207eda2314e Mon Sep 17 00:00:00 2001
From: Tianyi Zheng <tianyizheng02@gmail.com>
Date: Wed, 27 Sep 2023 08:01:18 -0400
Subject: [PATCH 6/9] Consolidate the two existing kNN implementations (#8903)

* Add type hints to k_nearest_neighbours.py

* Refactor k_nearest_neighbours.py into class

* Add documentation to k_nearest_neighbours.py

* Use heap-based priority queue for k_nearest_neighbours.py

* Delete knn_sklearn.py

* updating DIRECTORY.md

* Use optional args in k_nearest_neighbours.py for demo purposes

* Fix wrong function arg in k_nearest_neighbours.py

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
---
 DIRECTORY.md                             |   1 -
 machine_learning/k_nearest_neighbours.py | 128 ++++++++++++++---------
 machine_learning/knn_sklearn.py          |  31 ------
 3 files changed, 79 insertions(+), 81 deletions(-)
 delete mode 100644 machine_learning/knn_sklearn.py

diff --git a/DIRECTORY.md b/DIRECTORY.md
index d81e4ec1ee83..902999460fe5 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -507,7 +507,6 @@
   * [Gradient Descent](machine_learning/gradient_descent.py)
   * [K Means Clust](machine_learning/k_means_clust.py)
   * [K Nearest Neighbours](machine_learning/k_nearest_neighbours.py)
-  * [Knn Sklearn](machine_learning/knn_sklearn.py)
   * [Linear Discriminant Analysis](machine_learning/linear_discriminant_analysis.py)
   * [Linear Regression](machine_learning/linear_regression.py)
   * Local Weighted Learning
diff --git a/machine_learning/k_nearest_neighbours.py b/machine_learning/k_nearest_neighbours.py
index 2a90cfe5987a..a43757c5c20e 100644
--- a/machine_learning/k_nearest_neighbours.py
+++ b/machine_learning/k_nearest_neighbours.py
@@ -1,58 +1,88 @@
+"""
+k-Nearest Neighbours (kNN) is a simple non-parametric supervised learning
+algorithm used for classification. Given some labelled training data, a given
+point is classified using its k nearest neighbours according to some distance
+metric. The most commonly occurring label among the neighbours becomes the label
+of the given point. In effect, the label of the given point is decided by a
+majority vote.
+
+This implementation uses the commonly used Euclidean distance metric, but other
+distance metrics can also be used.
+
+Reference: https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
+"""
+
 from collections import Counter
+from heapq import nsmallest
 
 import numpy as np
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
 
-data = datasets.load_iris()
-
-X = np.array(data["data"])
-y = np.array(data["target"])
-classes = data["target_names"]
-
-X_train, X_test, y_train, y_test = train_test_split(X, y)
-
-
-def euclidean_distance(a, b):
-    """
-    Gives the euclidean distance between two points
-    >>> euclidean_distance([0, 0], [3, 4])
-    5.0
-    >>> euclidean_distance([1, 2, 3], [1, 8, 11])
-    10.0
-    """
-    return np.linalg.norm(np.array(a) - np.array(b))
-
-
-def classifier(train_data, train_target, classes, point, k=5):
-    """
-    Classifies the point using the KNN algorithm
-    k closest points are found (ranked in ascending order of euclidean distance)
-    Params:
-    :train_data: Set of points that are classified into two or more classes
-    :train_target: List of classes in the order of train_data points
-    :classes: Labels of the classes
-    :point: The data point that needs to be classified
-
-    >>> X_train = [[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]]
-    >>> y_train = [0, 0, 0, 0, 1, 1, 1]
-    >>> classes = ['A','B']; point = [1.2,1.2]
-    >>> classifier(X_train, y_train, classes,point)
-    'A'
-    """
-    data = zip(train_data, train_target)
-    # List of distances of all points from the point to be classified
-    distances = []
-    for data_point in data:
-        distance = euclidean_distance(data_point[0], point)
-        distances.append((distance, data_point[1]))
-    # Choosing 'k' points with the least distances.
-    votes = [i[1] for i in sorted(distances)[:k]]
-    # Most commonly occurring class among them
-    # is the class into which the point is classified
-    result = Counter(votes).most_common(1)[0][0]
-    return classes[result]
+
+class KNN:
+    def __init__(
+        self,
+        train_data: np.ndarray[float],
+        train_target: np.ndarray[int],
+        class_labels: list[str],
+    ) -> None:
+        """
+        Create a kNN classifier using the given training data and class labels
+        """
+        self.data = zip(train_data, train_target)
+        self.labels = class_labels
+
+    @staticmethod
+    def _euclidean_distance(a: np.ndarray[float], b: np.ndarray[float]) -> float:
+        """
+        Calculate the Euclidean distance between two points
+        >>> KNN._euclidean_distance(np.array([0, 0]), np.array([3, 4]))
+        5.0
+        >>> KNN._euclidean_distance(np.array([1, 2, 3]), np.array([1, 8, 11]))
+        10.0
+        """
+        return np.linalg.norm(a - b)
+
+    def classify(self, pred_point: np.ndarray[float], k: int = 5) -> str:
+        """
+        Classify a given point using the kNN algorithm
+        >>> train_X = np.array(
+        ...     [[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]]
+        ... )
+        >>> train_y = np.array([0, 0, 0, 0, 1, 1, 1])
+        >>> classes = ['A', 'B']
+        >>> knn = KNN(train_X, train_y, classes)
+        >>> point = np.array([1.2, 1.2])
+        >>> knn.classify(point)
+        'A'
+        """
+        # Distances of all points from the point to be classified
+        distances = (
+            (self._euclidean_distance(data_point[0], pred_point), data_point[1])
+            for data_point in self.data
+        )
+
+        # Choosing k points with the shortest distances
+        votes = (i[1] for i in nsmallest(k, distances))
+
+        # Most commonly occurring class is the one into which the point is classified
+        result = Counter(votes).most_common(1)[0][0]
+        return self.labels[result]
 
 
 if __name__ == "__main__":
-    print(classifier(X_train, y_train, classes, [4.4, 3.1, 1.3, 1.4]))
+    import doctest
+
+    doctest.testmod()
+
+    iris = datasets.load_iris()
+
+    X = np.array(iris["data"])
+    y = np.array(iris["target"])
+    iris_classes = iris["target_names"]
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    iris_point = np.array([4.4, 3.1, 1.3, 1.4])
+    classifier = KNN(X_train, y_train, iris_classes)
+    print(classifier.classify(iris_point, k=3))
diff --git a/machine_learning/knn_sklearn.py b/machine_learning/knn_sklearn.py
deleted file mode 100644
index 4a621a4244b6..000000000000
--- a/machine_learning/knn_sklearn.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from sklearn.datasets import load_iris
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsClassifier
-
-# Load iris file
-iris = load_iris()
-iris.keys()
-
-
-print(f"Target names: \n {iris.target_names} ")
-print(f"\n Features: \n {iris.feature_names}")
-
-# Train set e Test set
-X_train, X_test, y_train, y_test = train_test_split(
-    iris["data"], iris["target"], random_state=4
-)
-
-# KNN
-
-knn = KNeighborsClassifier(n_neighbors=1)
-knn.fit(X_train, y_train)
-
-# new array to test
-X_new = [[1, 2, 1, 4], [2, 3, 4, 5]]
-
-prediction = knn.predict(X_new)
-
-print(
-    f"\nNew array: \n {X_new}\n\nTarget Names Prediction: \n"
-    f" {iris['target_names'][prediction]}"
-)

From f9b8759ba82cd7ca4e4a99b9bc9b661ace5a93cc Mon Sep 17 00:00:00 2001
From: Tianyi Zheng <tianyizheng02@gmail.com>
Date: Wed, 27 Sep 2023 09:54:40 -0400
Subject: [PATCH 7/9] Move bitwise add (#9097)

* updating DIRECTORY.md

* updating DIRECTORY.md

* updating DIRECTORY.md

* Move and rename maths/sum_of_two_positive_numbers_bitwise.py

* updating DIRECTORY.md

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
---
 DIRECTORY.md                                                   | 3 +++
 .../bitwise_addition_recursive.py                              | 0
 2 files changed, 3 insertions(+)
 rename maths/sum_of_two_positive_numbers_bitwise.py => bit_manipulation/bitwise_addition_recursive.py (100%)

diff --git a/DIRECTORY.md b/DIRECTORY.md
index 902999460fe5..e596d96e5e83 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -43,6 +43,7 @@
   * [Binary Shifts](bit_manipulation/binary_shifts.py)
   * [Binary Twos Complement](bit_manipulation/binary_twos_complement.py)
   * [Binary Xor Operator](bit_manipulation/binary_xor_operator.py)
+  * [Bitwise Addition Recursive](bit_manipulation/bitwise_addition_recursive.py)
   * [Count 1S Brian Kernighan Method](bit_manipulation/count_1s_brian_kernighan_method.py)
   * [Count Number Of One Bits](bit_manipulation/count_number_of_one_bits.py)
   * [Gray Code Sequence](bit_manipulation/gray_code_sequence.py)
@@ -514,6 +515,7 @@
   * [Logistic Regression](machine_learning/logistic_regression.py)
   * Lstm
     * [Lstm Prediction](machine_learning/lstm/lstm_prediction.py)
+  * [Mfcc](machine_learning/mfcc.py)
   * [Multilayer Perceptron Classifier](machine_learning/multilayer_perceptron_classifier.py)
   * [Polynomial Regression](machine_learning/polynomial_regression.py)
   * [Scoring Functions](machine_learning/scoring_functions.py)
@@ -752,6 +754,7 @@
   * [Basic Orbital Capture](physics/basic_orbital_capture.py)
   * [Casimir Effect](physics/casimir_effect.py)
   * [Centripetal Force](physics/centripetal_force.py)
+  * [Coulombs Law](physics/coulombs_law.py)
   * [Grahams Law](physics/grahams_law.py)
   * [Horizontal Projectile Motion](physics/horizontal_projectile_motion.py)
   * [Hubble Parameter](physics/hubble_parameter.py)
diff --git a/maths/sum_of_two_positive_numbers_bitwise.py b/bit_manipulation/bitwise_addition_recursive.py
similarity index 100%
rename from maths/sum_of_two_positive_numbers_bitwise.py
rename to bit_manipulation/bitwise_addition_recursive.py

From 38c2b839819549d1ab8566675fab09db449875cc Mon Sep 17 00:00:00 2001
From: aryan1165 <111041731+aryan1165@users.noreply.github.com>
Date: Wed, 27 Sep 2023 19:26:01 +0530
Subject: [PATCH 8/9] Deleted euclidean_gcd.py. Fixes#8063 (#9108)

---
 maths/euclidean_gcd.py | 47 ------------------------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 maths/euclidean_gcd.py

diff --git a/maths/euclidean_gcd.py b/maths/euclidean_gcd.py
deleted file mode 100644
index de4b250243db..000000000000
--- a/maths/euclidean_gcd.py
+++ /dev/null
@@ -1,47 +0,0 @@
-""" https://en.wikipedia.org/wiki/Euclidean_algorithm """
-
-
-def euclidean_gcd(a: int, b: int) -> int:
-    """
-    Examples:
-    >>> euclidean_gcd(3, 5)
-    1
-
-    >>> euclidean_gcd(6, 3)
-    3
-    """
-    while b:
-        a, b = b, a % b
-    return a
-
-
-def euclidean_gcd_recursive(a: int, b: int) -> int:
-    """
-    Recursive method for euclicedan gcd algorithm
-
-    Examples:
-    >>> euclidean_gcd_recursive(3, 5)
-    1
-
-    >>> euclidean_gcd_recursive(6, 3)
-    3
-    """
-    return a if b == 0 else euclidean_gcd_recursive(b, a % b)
-
-
-def main():
-    print(f"euclidean_gcd(3, 5) = {euclidean_gcd(3, 5)}")
-    print(f"euclidean_gcd(5, 3) = {euclidean_gcd(5, 3)}")
-    print(f"euclidean_gcd(1, 3) = {euclidean_gcd(1, 3)}")
-    print(f"euclidean_gcd(3, 6) = {euclidean_gcd(3, 6)}")
-    print(f"euclidean_gcd(6, 3) = {euclidean_gcd(6, 3)}")
-
-    print(f"euclidean_gcd_recursive(3, 5) = {euclidean_gcd_recursive(3, 5)}")
-    print(f"euclidean_gcd_recursive(5, 3) = {euclidean_gcd_recursive(5, 3)}")
-    print(f"euclidean_gcd_recursive(1, 3) = {euclidean_gcd_recursive(1, 3)}")
-    print(f"euclidean_gcd_recursive(3, 6) = {euclidean_gcd_recursive(3, 6)}")
-    print(f"euclidean_gcd_recursive(6, 3) = {euclidean_gcd_recursive(6, 3)}")
-
-
-if __name__ == "__main__":
-    main()

From 35dd529c85fc433e0780cdaff586c684208aa1b7 Mon Sep 17 00:00:00 2001
From: Hetarth Jain <hetarth.jain@gmail.com>
Date: Thu, 28 Sep 2023 23:54:46 +0530
Subject: [PATCH 9/9] Returning Index instead of boolean in knuth_morris_pratt
 (kmp) function, making it compatible with str.find(). (#9083)

* Update knuth_morris_pratt.py - changed Boolean to Index

* Update knuth_morris_pratt.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update knuth_morris_pratt.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update knuth_morris_pratt.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update back_propagation_neural_network.py

* Update back_propagation_neural_network.py

* Update strings/knuth_morris_pratt.py

* Update knuth_morris_pratt.py

* Update knuth_morris_pratt.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Christian Clauss <cclauss@me.com>
---
 strings/knuth_morris_pratt.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/strings/knuth_morris_pratt.py b/strings/knuth_morris_pratt.py
index a488c171a93b..8a04eb2532c0 100644
--- a/strings/knuth_morris_pratt.py
+++ b/strings/knuth_morris_pratt.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 
-def kmp(pattern: str, text: str) -> bool:
+def knuth_morris_pratt(text: str, pattern: str) -> int:
     """
     The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
     with complexity O(n + m)
@@ -14,6 +14,12 @@ def kmp(pattern: str, text: str) -> bool:
     2) Step through the text one character at a time and compare it to a character in
         the pattern updating our location within the pattern if necessary
 
+    >>> kmp = "knuth_morris_pratt"
+    >>> all(
+    ...    knuth_morris_pratt(kmp, s) == kmp.find(s)
+    ...    for s in ("kn", "h_m", "rr", "tt", "not there")
+    ... )
+    True
     """
 
     # 1) Construct the failure array
@@ -24,7 +30,7 @@ def kmp(pattern: str, text: str) -> bool:
     while i < len(text):
         if pattern[j] == text[i]:
             if j == (len(pattern) - 1):
-                return True
+                return i - j
             j += 1
 
         # if this is a prefix in our pattern
@@ -33,7 +39,7 @@ def kmp(pattern: str, text: str) -> bool:
             j = failure[j - 1]
             continue
         i += 1
-    return False
+    return -1
 
 
 def get_failure_array(pattern: str) -> list[int]:
@@ -57,27 +63,38 @@ def get_failure_array(pattern: str) -> list[int]:
 
 
 if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
     # Test 1)
     pattern = "abc1abc12"
     text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
     text2 = "alskfjaldsk23adsfabcabc"
-    assert kmp(pattern, text1) and not kmp(pattern, text2)
+    assert knuth_morris_pratt(text1, pattern) and knuth_morris_pratt(text2, pattern)
 
     # Test 2)
     pattern = "ABABX"
     text = "ABABZABABYABABX"
-    assert kmp(pattern, text)
+    assert knuth_morris_pratt(text, pattern)
 
     # Test 3)
     pattern = "AAAB"
     text = "ABAAAAAB"
-    assert kmp(pattern, text)
+    assert knuth_morris_pratt(text, pattern)
 
     # Test 4)
     pattern = "abcdabcy"
     text = "abcxabcdabxabcdabcdabcy"
-    assert kmp(pattern, text)
+    assert knuth_morris_pratt(text, pattern)
+
+    # Test 5) -> Doctests
+    kmp = "knuth_morris_pratt"
+    assert all(
+        knuth_morris_pratt(kmp, s) == kmp.find(s)
+        for s in ("kn", "h_m", "rr", "tt", "not there")
+    )
 
-    # Test 5)
+    # Test 6)
     pattern = "aabaabaaa"
     assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2]