Add Pollard's Rho algorithm for integer factorization (#5598)

llamasoft · web-flow · commit 5c8a6c824723 · 2021-10-29T01:53:39.000+02:00
diff --git a/maths/pollard_rho.py b/maths/pollard_rho.py
@@ -0,0 +1,148 @@
+from math import gcd
+from typing import Union
+
+
+def pollard_rho(
+    num: int,
+    seed: int = 2,
+    step: int = 1,
+    attempts: int = 3,
+) -> Union[int, None]:
+    """
+    Use Pollard's Rho algorithm to return a nontrivial factor of ``num``.
+    The returned factor may be composite and require further factorization.
+    If the algorithm will return None if it fails to find a factor within
+    the specified number of attempts or within the specified number of steps.
+    If ``num`` is prime, this algorithm is guaranteed to return None.
+    https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm
+
+    >>> pollard_rho(18446744073709551617)
+    274177
+    >>> pollard_rho(97546105601219326301)
+    9876543191
+    >>> pollard_rho(100)
+    2
+    >>> pollard_rho(17)
+    >>> pollard_rho(17**3)
+    17
+    >>> pollard_rho(17**3, attempts=1)
+    >>> pollard_rho(3*5*7)
+    21
+    >>> pollard_rho(1)
+    Traceback (most recent call last):
+        ...
+    ValueError: The input value cannot be less than 2
+    """
+    # A value less than 2 can cause an infinite loop in the algorithm.
+    if num < 2:
+        raise ValueError("The input value cannot be less than 2")
+
+    # Because of the relationship between ``f(f(x))`` and ``f(x)``, this
+    # algorithm struggles to find factors that are divisible by two.
+    # As a workaround, we specifically check for two and even inputs.
+    #   See: https://math.stackexchange.com/a/2856214/165820
+    if num > 2 and num % 2 == 0:
+        return 2
+
+    # Pollard's Rho algorithm requires a function that returns pseudorandom
+    # values between 0 <= X < ``num``.  It doesn't need to be random in the
+    # sense that the output value is cryptographically secure or difficult
+    # to calculate, it only needs to be random in the sense that all output
+    # values should be equally likely to appear.
+    # For this reason, Pollard suggested using ``f(x) = (x**2 - 1) % num``
+    # However, the success of Pollard's algorithm isn't guaranteed and is
+    # determined in part by the initial seed and the chosen random function.
+    # To make retries easier, we will instead use ``f(x) = (x**2 + C) % num``
+    # where ``C`` is a value that we can modify between each attempt.
+    def rand_fn(value: int, step: int, modulus: int) -> int:
+        """
+        Returns a pseudorandom value modulo ``modulus`` based on the
+        input ``value`` and attempt-specific ``step`` size.
+
+        >>> rand_fn(0, 0, 0)
+        Traceback (most recent call last):
+            ...
+        ZeroDivisionError: integer division or modulo by zero
+        >>> rand_fn(1, 2, 3)
+        0
+        >>> rand_fn(0, 10, 7)
+        3
+        >>> rand_fn(1234, 1, 17)
+        16
+        """
+        return (pow(value, 2) + step) % modulus
+
+    for attempt in range(attempts):
+        # These track the position within the cycle detection logic.
+        tortoise = seed
+        hare = seed
+
+        while True:
+            # At each iteration, the tortoise moves one step and the hare moves two.
+            tortoise = rand_fn(tortoise, step, num)
+            hare = rand_fn(hare, step, num)
+            hare = rand_fn(hare, step, num)
+
+            # At some point both the tortoise and the hare will enter a cycle whose
+            # length ``p`` is a divisor of ``num``.  Once in that cycle, at some point
+            # the tortoise and hare will end up on the same value modulo ``p``.
+            # We can detect when this happens because the position difference between
+            # the tortoise and the hare will share a common divisor with ``num``.
+            divisor = gcd(hare - tortoise, num)
+
+            if divisor == 1:
+                # No common divisor yet, just keep searching.
+                continue
+            else:
+                # We found a common divisor!
+                if divisor == num:
+                    # Unfortunately, the divisor is ``num`` itself and is useless.
+                    break
+                else:
+                    # The divisor is a nontrivial factor of ``num``!
+                    return divisor
+
+        # If we made it here, then this attempt failed.
+        # We need to pick a new starting seed for the tortoise and hare
+        # in addition to a new step value for the random function.
+        # To keep this example implementation deterministic, the
+        # new values will be generated based on currently available
+        # values instead of using something like ``random.randint``.
+
+        # We can use the hare's position as the new seed.
+        # This is actually what Richard Brent's the "optimized" variant does.
+        seed = hare
+
+        # The new step value for the random function can just be incremented.
+        # At first the results will be similar to what the old function would
+        # have produced, but the value will quickly diverge after a bit.
+        step += 1
+
+    # We haven't found a divisor within the requested number of attempts.
+    # We were unlucky or ``num`` itself is actually prime.
+    return None
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "num",
+        type=int,
+        help="The value to find a divisor of",
+    )
+    parser.add_argument(
+        "--attempts",
+        type=int,
+        default=3,
+        help="The number of attempts before giving up",
+    )
+    args = parser.parse_args()
+
+    divisor = pollard_rho(args.num, attempts=args.attempts)
+    if divisor is None:
+        print(f"{args.num} is probably prime")
+    else:
+        quotient = args.num // divisor
+        print(f"{args.num} = {divisor} * {quotient}")