Skip to content

Commit 5c8a6c8

Browse files
authored
Add Pollard's Rho algorithm for integer factorization (#5598)
1 parent 0590d73 commit 5c8a6c8

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

Diff for: maths/pollard_rho.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from math import gcd
2+
from typing import Union
3+
4+
5+
def pollard_rho(
6+
num: int,
7+
seed: int = 2,
8+
step: int = 1,
9+
attempts: int = 3,
10+
) -> Union[int, None]:
11+
"""
12+
Use Pollard's Rho algorithm to return a nontrivial factor of ``num``.
13+
The returned factor may be composite and require further factorization.
14+
If the algorithm will return None if it fails to find a factor within
15+
the specified number of attempts or within the specified number of steps.
16+
If ``num`` is prime, this algorithm is guaranteed to return None.
17+
https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm
18+
19+
>>> pollard_rho(18446744073709551617)
20+
274177
21+
>>> pollard_rho(97546105601219326301)
22+
9876543191
23+
>>> pollard_rho(100)
24+
2
25+
>>> pollard_rho(17)
26+
>>> pollard_rho(17**3)
27+
17
28+
>>> pollard_rho(17**3, attempts=1)
29+
>>> pollard_rho(3*5*7)
30+
21
31+
>>> pollard_rho(1)
32+
Traceback (most recent call last):
33+
...
34+
ValueError: The input value cannot be less than 2
35+
"""
36+
# A value less than 2 can cause an infinite loop in the algorithm.
37+
if num < 2:
38+
raise ValueError("The input value cannot be less than 2")
39+
40+
# Because of the relationship between ``f(f(x))`` and ``f(x)``, this
41+
# algorithm struggles to find factors that are divisible by two.
42+
# As a workaround, we specifically check for two and even inputs.
43+
# See: https://math.stackexchange.com/a/2856214/165820
44+
if num > 2 and num % 2 == 0:
45+
return 2
46+
47+
# Pollard's Rho algorithm requires a function that returns pseudorandom
48+
# values between 0 <= X < ``num``. It doesn't need to be random in the
49+
# sense that the output value is cryptographically secure or difficult
50+
# to calculate, it only needs to be random in the sense that all output
51+
# values should be equally likely to appear.
52+
# For this reason, Pollard suggested using ``f(x) = (x**2 - 1) % num``
53+
# However, the success of Pollard's algorithm isn't guaranteed and is
54+
# determined in part by the initial seed and the chosen random function.
55+
# To make retries easier, we will instead use ``f(x) = (x**2 + C) % num``
56+
# where ``C`` is a value that we can modify between each attempt.
57+
def rand_fn(value: int, step: int, modulus: int) -> int:
58+
"""
59+
Returns a pseudorandom value modulo ``modulus`` based on the
60+
input ``value`` and attempt-specific ``step`` size.
61+
62+
>>> rand_fn(0, 0, 0)
63+
Traceback (most recent call last):
64+
...
65+
ZeroDivisionError: integer division or modulo by zero
66+
>>> rand_fn(1, 2, 3)
67+
0
68+
>>> rand_fn(0, 10, 7)
69+
3
70+
>>> rand_fn(1234, 1, 17)
71+
16
72+
"""
73+
return (pow(value, 2) + step) % modulus
74+
75+
for attempt in range(attempts):
76+
# These track the position within the cycle detection logic.
77+
tortoise = seed
78+
hare = seed
79+
80+
while True:
81+
# At each iteration, the tortoise moves one step and the hare moves two.
82+
tortoise = rand_fn(tortoise, step, num)
83+
hare = rand_fn(hare, step, num)
84+
hare = rand_fn(hare, step, num)
85+
86+
# At some point both the tortoise and the hare will enter a cycle whose
87+
# length ``p`` is a divisor of ``num``. Once in that cycle, at some point
88+
# the tortoise and hare will end up on the same value modulo ``p``.
89+
# We can detect when this happens because the position difference between
90+
# the tortoise and the hare will share a common divisor with ``num``.
91+
divisor = gcd(hare - tortoise, num)
92+
93+
if divisor == 1:
94+
# No common divisor yet, just keep searching.
95+
continue
96+
else:
97+
# We found a common divisor!
98+
if divisor == num:
99+
# Unfortunately, the divisor is ``num`` itself and is useless.
100+
break
101+
else:
102+
# The divisor is a nontrivial factor of ``num``!
103+
return divisor
104+
105+
# If we made it here, then this attempt failed.
106+
# We need to pick a new starting seed for the tortoise and hare
107+
# in addition to a new step value for the random function.
108+
# To keep this example implementation deterministic, the
109+
# new values will be generated based on currently available
110+
# values instead of using something like ``random.randint``.
111+
112+
# We can use the hare's position as the new seed.
113+
# This is actually what Richard Brent's the "optimized" variant does.
114+
seed = hare
115+
116+
# The new step value for the random function can just be incremented.
117+
# At first the results will be similar to what the old function would
118+
# have produced, but the value will quickly diverge after a bit.
119+
step += 1
120+
121+
# We haven't found a divisor within the requested number of attempts.
122+
# We were unlucky or ``num`` itself is actually prime.
123+
return None
124+
125+
126+
if __name__ == "__main__":
127+
import argparse
128+
129+
parser = argparse.ArgumentParser()
130+
parser.add_argument(
131+
"num",
132+
type=int,
133+
help="The value to find a divisor of",
134+
)
135+
parser.add_argument(
136+
"--attempts",
137+
type=int,
138+
default=3,
139+
help="The number of attempts before giving up",
140+
)
141+
args = parser.parse_args()
142+
143+
divisor = pollard_rho(args.num, attempts=args.attempts)
144+
if divisor is None:
145+
print(f"{args.num} is probably prime")
146+
else:
147+
quotient = args.num // divisor
148+
print(f"{args.num} = {divisor} * {quotient}")

0 commit comments

Comments
 (0)