Merge branch 'master' of https://github.com/sowndappan5/Python

sowndappan5 · sowndappan5 · commit 196e658a1841 · 2024-06-04T22:09:16.000+05:30
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
       - id: auto-walrus
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.5
+    rev: v0.4.7
     hooks:
       - id: ruff
       - id: ruff-format
diff --git a/DIRECTORY.md b/DIRECTORY.md
@@ -661,7 +661,6 @@
   * [Manhattan Distance](maths/manhattan_distance.py)
   * [Matrix Exponentiation](maths/matrix_exponentiation.py)
   * [Max Sum Sliding Window](maths/max_sum_sliding_window.py)
-  * [Median Of Two Arrays](maths/median_of_two_arrays.py)
   * [Minkowski Distance](maths/minkowski_distance.py)
   * [Mobius Function](maths/mobius_function.py)
   * [Modular Division](maths/modular_division.py)
diff --git a/backtracking/all_permutations.py b/backtracking/all_permutations.py
@@ -23,6 +23,42 @@ def create_state_space_tree(
     Creates a state space tree to iterate through each branch using DFS.
     We know that each state has exactly len(sequence) - index children.
     It terminates when it reaches the end of the given sequence.
+
+    :param sequence: The input sequence for which permutations are generated.
+    :param current_sequence: The current permutation being built.
+    :param index: The current index in the sequence.
+    :param index_used: list to track which elements are used in permutation.
+
+    Example 1:
+    >>> sequence = [1, 2, 3]
+    >>> current_sequence = []
+    >>> index_used = [False, False, False]
+    >>> create_state_space_tree(sequence, current_sequence, 0, index_used)
+    [1, 2, 3]
+    [1, 3, 2]
+    [2, 1, 3]
+    [2, 3, 1]
+    [3, 1, 2]
+    [3, 2, 1]
+
+    Example 2:
+    >>> sequence = ["A", "B", "C"]
+    >>> current_sequence = []
+    >>> index_used = [False, False, False]
+    >>> create_state_space_tree(sequence, current_sequence, 0, index_used)
+    ['A', 'B', 'C']
+    ['A', 'C', 'B']
+    ['B', 'A', 'C']
+    ['B', 'C', 'A']
+    ['C', 'A', 'B']
+    ['C', 'B', 'A']
+
+    Example 3:
+    >>> sequence = [1]
+    >>> current_sequence = []
+    >>> index_used = [False]
+    >>> create_state_space_tree(sequence, current_sequence, 0, index_used)
+    [1]
     """
 
     if index == len(sequence):
diff --git a/backtracking/all_subsequences.py b/backtracking/all_subsequences.py
@@ -22,6 +22,56 @@ def create_state_space_tree(
     Creates a state space tree to iterate through each branch using DFS.
     We know that each state has exactly two children.
     It terminates when it reaches the end of the given sequence.
+
+    :param sequence: The input sequence for which subsequences are generated.
+    :param current_subsequence: The current subsequence being built.
+    :param index: The current index in the sequence.
+
+    Example:
+    >>> sequence = [3, 2, 1]
+    >>> current_subsequence = []
+    >>> create_state_space_tree(sequence, current_subsequence, 0)
+    []
+    [1]
+    [2]
+    [2, 1]
+    [3]
+    [3, 1]
+    [3, 2]
+    [3, 2, 1]
+
+    >>> sequence = ["A", "B"]
+    >>> current_subsequence = []
+    >>> create_state_space_tree(sequence, current_subsequence, 0)
+    []
+    ['B']
+    ['A']
+    ['A', 'B']
+
+    >>> sequence = []
+    >>> current_subsequence = []
+    >>> create_state_space_tree(sequence, current_subsequence, 0)
+    []
+
+    >>> sequence = [1, 2, 3, 4]
+    >>> current_subsequence = []
+    >>> create_state_space_tree(sequence, current_subsequence, 0)
+    []
+    [4]
+    [3]
+    [3, 4]
+    [2]
+    [2, 4]
+    [2, 3]
+    [2, 3, 4]
+    [1]
+    [1, 4]
+    [1, 3]
+    [1, 3, 4]
+    [1, 2]
+    [1, 2, 4]
+    [1, 2, 3]
+    [1, 2, 3, 4]
     """
 
     if index == len(sequence):
@@ -35,7 +85,7 @@ def create_state_space_tree(
 
 
 if __name__ == "__main__":
-    seq: list[Any] = [3, 1, 2, 4]
+    seq: list[Any] = [1, 2, 3]
     generate_all_subsequences(seq)
 
     seq.clear()
diff --git a/bit_manipulation/binary_and_operator.py b/bit_manipulation/binary_and_operator.py
@@ -26,7 +26,7 @@ def binary_and(a: int, b: int) -> str:
     >>> binary_and(0, 1.1)
     Traceback (most recent call last):
         ...
-    TypeError: 'float' object cannot be interpreted as an integer
+    ValueError: Unknown format code 'b' for object of type 'float'
     >>> binary_and("0", "1")
     Traceback (most recent call last):
         ...
@@ -35,8 +35,8 @@ def binary_and(a: int, b: int) -> str:
     if a < 0 or b < 0:
         raise ValueError("the value of both inputs must be positive")
 
-    a_binary = str(bin(a))[2:]  # remove the leading "0b"
-    b_binary = str(bin(b))[2:]  # remove the leading "0b"
+    a_binary = format(a, "b")
+    b_binary = format(b, "b")
 
     max_len = max(len(a_binary), len(b_binary))
 
diff --git a/divide_and_conquer/power.py b/divide_and_conquer/power.py
@@ -2,6 +2,20 @@ def actual_power(a: int, b: int):
     """
     Function using divide and conquer to calculate a^b.
     It only works for integer a,b.
+
+    :param a: The base of the power operation, an integer.
+    :param b: The exponent of the power operation, a non-negative integer.
+    :return: The result of a^b.
+
+    Examples:
+    >>> actual_power(3, 2)
+    9
+    >>> actual_power(5, 3)
+    125
+    >>> actual_power(2, 5)
+    32
+    >>> actual_power(7, 0)
+    1
     """
     if b == 0:
         return 1
@@ -13,6 +27,10 @@ def actual_power(a: int, b: int):
 
 def power(a: int, b: int) -> float:
     """
+    :param a: The base (integer).
+    :param b: The exponent (integer).
+    :return: The result of a^b, as a float for negative exponents.
+
     >>> power(4,6)
     4096
     >>> power(2,3)
diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py
@@ -629,6 +629,40 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
     return np.mean(loss)
 
 
+def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
+    and predicted probabilities.
+
+    KL divergence loss quantifies dissimilarity between true labels and predicted
+    probabilities. It's often used in training generative models.
+
+    KL = Σ(y_true * ln(y_true / y_pred))
+
+    Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
+
+    Parameters:
+    - y_true: True class probabilities
+    - y_pred: Predicted class probabilities
+
+    >>> true_labels = np.array([0.2, 0.3, 0.5])
+    >>> predicted_probs = np.array([0.3, 0.3, 0.4])
+    >>> kullback_leibler_divergence(true_labels, predicted_probs)
+    0.030478754035472025
+    >>> true_labels = np.array([0.2, 0.3, 0.5])
+    >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
+    >>> kullback_leibler_divergence(true_labels, predicted_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Input arrays must have the same length.")
+
+    kl_loss = y_true * np.log(y_true / y_pred)
+    return np.sum(kl_loss)
+
+
 if __name__ == "__main__":
     import doctest
 
diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py
@@ -463,7 +463,11 @@ def test_cancel_data():
         with open(r"cancel_data.csv", "w") as f:
             f.write(content)
 
-    data = pd.read_csv(r"cancel_data.csv", header=None)
+    data = pd.read_csv(
+        "cancel_data.csv",
+        header=None,
+        dtype={0: str},  # Assuming the first column contains string data
+    )
 
     # 1: pre-processing data
     del data[data.columns.tolist()[0]]
diff --git a/maths/median_of_two_arrays.py b/maths/median_of_two_arrays.py
diff --git a/searches/binary_search.py b/searches/binary_search.py
@@ -2,6 +2,7 @@
 
 from bisect import bisect_left, bisect_right
 
+
 def bisect_left_custom(sorted_collection, item, lo=0, hi=None):
     """
     Custom implementation of bisect_left.
@@ -17,6 +18,7 @@ def bisect_left_custom(sorted_collection, item, lo=0, hi=None):
             hi = mid
     return lo
 
+
 def bisect_right_custom(sorted_collection, item, lo=0, hi=None):
     """
     Custom implementation of bisect_right.
@@ -32,18 +34,21 @@ def bisect_right_custom(sorted_collection, item, lo=0, hi=None):
             hi = mid
     return lo
 
+
 def insort_left_custom(sorted_collection, item, lo=0, hi=None):
     """
     Inserts item into sorted_collection in sorted order (using bisect_left_custom).
     """
     sorted_collection.insert(bisect_left_custom(sorted_collection, item, lo, hi), item)
 
+
 def insort_right_custom(sorted_collection, item, lo=0, hi=None):
     """
     Inserts item into sorted_collection in sorted order (using bisect_right_custom).
     """
     sorted_collection.insert(bisect_right_custom(sorted_collection, item, lo, hi), item)
 
+
 def binary_search(sorted_collection, item):
     """
     Standard binary search implementation.
@@ -60,6 +65,7 @@ def binary_search(sorted_collection, item):
             hi = mid - 1
     return -1
 
+
 def binary_search_std_lib(sorted_collection, item):
     """
     Binary search using Python's standard library bisect module.
@@ -69,6 +75,7 @@ def binary_search_std_lib(sorted_collection, item):
         return index
     return -1
 
+
 def binary_search_by_recursion(sorted_collection, item, lo=0, hi=None):
     """
     Binary search using recursion.
@@ -85,6 +92,7 @@ def binary_search_by_recursion(sorted_collection, item, lo=0, hi=None):
     else:
         return binary_search_by_recursion(sorted_collection, item, mid + 1, hi)
 
+
 def exponential_search(sorted_collection, item):
     """
     Exponential search implementation.
@@ -95,7 +103,10 @@ def exponential_search(sorted_collection, item):
     bound = 1
     while bound < len(sorted_collection) and sorted_collection[bound] < item:
         bound *= 2
-    return binary_search_by_recursion(sorted_collection, item, bound // 2, min(bound, len(sorted_collection) - 1))
+    return binary_search_by_recursion(
+        sorted_collection, item, bound // 2, min(bound, len(sorted_collection) - 1)
+    )
+
 
 if __name__ == "__main__":
     import doctest
@@ -105,7 +116,12 @@ def exponential_search(sorted_collection, item):
     doctest.testmod()
 
     # List of search functions to benchmark
-    searches = [binary_search_std_lib, binary_search, exponential_search, binary_search_by_recursion]
+    searches = [
+        binary_search_std_lib,
+        binary_search,
+        exponential_search,
+        binary_search_by_recursion,
+    ]
 
     # Test and print results of searching for 10 in a sample list
     for search in searches:
@@ -115,7 +131,12 @@ def exponential_search(sorted_collection, item):
     setup = "collection = list(range(1000))"
     # Benchmark each search function
     for search in searches:
-        time = timeit.timeit(f"{search.__name__}(collection, 500)", setup=setup, number=5000, globals=globals())
+        time = timeit.timeit(
+            f"{search.__name__}(collection, 500)",
+            setup=setup,
+            number=5000,
+            globals=globals(),
+        )
         print(f"{search.__name__:>26}: {time:.6f}")
 
     # Interactive part: user inputs a list and a target number