Skip to content

Commit 5bc3849

Browse files
committed
extract alphabet size logic
1 parent d2fda29 commit 5bc3849

File tree

1 file changed

+32
-37
lines changed
  • hypothesis-python/src/hypothesis/internal/conjecture

1 file changed

+32
-37
lines changed

hypothesis-python/src/hypothesis/internal/conjecture/datatree.py

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,30 @@ def _repr_pretty_(self, p, cycle):
146146
MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000
147147

148148

149+
def _count_distinct_strings(*, alphabet_size, min_size, max_size):
150+
# We want to estimate if we're going to have more children than
151+
# MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
152+
# extremely expensive pow. We'll check if the number of strings in
153+
# the largest string size alone is enough to put us over this limit.
154+
# We'll also employ a trick of estimating against log, which is cheaper
155+
# than computing a pow.
156+
#
157+
# x = max_size
158+
# y = alphabet_size
159+
# n = MAX_CHILDREN_EFFECTIVELY_INFINITE
160+
#
161+
# x**y > n
162+
# <=> log(x**y) > log(n)
163+
# <=> y * log(x) > log(n)
164+
definitely_too_large = max_size * math.log(alphabet_size) > math.log(
165+
MAX_CHILDREN_EFFECTIVELY_INFINITE
166+
)
167+
if definitely_too_large:
168+
return MAX_CHILDREN_EFFECTIVELY_INFINITE
169+
170+
return sum(alphabet_size**k for k in range(min_size, max_size + 1))
171+
172+
149173
def compute_max_children(ir_type, kwargs):
150174
if ir_type == "integer":
151175
min_value = kwargs["min_value"]
@@ -176,16 +200,9 @@ def compute_max_children(ir_type, kwargs):
176200
return 1
177201
return 2
178202
elif ir_type == "bytes":
179-
min_size = kwargs["min_size"]
180-
max_size = kwargs["max_size"]
181-
182-
definitely_too_large = max_size * math.log(2**8) > math.log(
183-
MAX_CHILDREN_EFFECTIVELY_INFINITE
203+
return _count_distinct_strings(
204+
alphabet_size=2**8, min_size=kwargs["min_size"], max_size=kwargs["max_size"]
184205
)
185-
if definitely_too_large:
186-
return MAX_CHILDREN_EFFECTIVELY_INFINITE
187-
188-
return sum(2 ** (8 * k) for k in range(min_size, max_size + 1))
189206
elif ir_type == "string":
190207
min_size = kwargs["min_size"]
191208
max_size = kwargs["max_size"]
@@ -196,36 +213,14 @@ def compute_max_children(ir_type, kwargs):
196213
# Only possibility is the empty string.
197214
return 1
198215

199-
# We want to estimate if we're going to have more children than
200-
# MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
201-
# extremely expensive pow. We'll check if the number of strings in
202-
# the largest string size alone is enough to put us over this limit.
203-
# We'll also employ a trick of estimating against log, which is cheaper
204-
# than computing a pow.
205-
#
206-
# x = max_size
207-
# y = len(intervals)
208-
# n = MAX_CHILDREN_EFFECTIVELY_INFINITE
209-
#
210-
# x**y > n
211-
# <=> log(x**y) > log(n)
212-
# <=> y * log(x) > log(n)
213-
214-
# avoid math.log(1) == 0 and incorrectly failing the below estimate,
215-
# even when we definitely are too large.
216-
if len(intervals) == 1:
217-
definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
218-
else:
219-
definitely_too_large = max_size * math.log(len(intervals)) > math.log(
220-
MAX_CHILDREN_EFFECTIVELY_INFINITE
221-
)
222-
223-
if definitely_too_large:
216+
# avoid math.log(1) == 0 and incorrectly failing our effectively_infinite
217+
# estimate, even when we definitely are too large.
218+
if len(intervals) == 1 and max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE:
224219
return MAX_CHILDREN_EFFECTIVELY_INFINITE
225220

226-
# number of strings of length k, for each k in [min_size, max_size].
227-
return sum(len(intervals) ** k for k in range(min_size, max_size + 1))
228-
221+
return _count_distinct_strings(
222+
alphabet_size=len(intervals), min_size=min_size, max_size=max_size
223+
)
229224
elif ir_type == "float":
230225
min_value = kwargs["min_value"]
231226
max_value = kwargs["max_value"]

0 commit comments

Comments
 (0)