@@ -146,6 +146,30 @@ def _repr_pretty_(self, p, cycle):
146
146
MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000
147
147
148
148
149
+ def _count_distinct_strings (* , alphabet_size , min_size , max_size ):
150
+ # We want to estimate if we're going to have more children than
151
+ # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
152
+ # extremely expensive pow. We'll check if the number of strings in
153
+ # the largest string size alone is enough to put us over this limit.
154
+ # We'll also employ a trick of estimating against log, which is cheaper
155
+ # than computing a pow.
156
+ #
157
+ # x = max_size
158
+ # y = alphabet_size
159
+ # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
160
+ #
161
+ # x**y > n
162
+ # <=> log(x**y) > log(n)
163
+ # <=> y * log(x) > log(n)
164
+ definitely_too_large = max_size * math .log (alphabet_size ) > math .log (
165
+ MAX_CHILDREN_EFFECTIVELY_INFINITE
166
+ )
167
+ if definitely_too_large :
168
+ return MAX_CHILDREN_EFFECTIVELY_INFINITE
169
+
170
+ return sum (alphabet_size ** k for k in range (min_size , max_size + 1 ))
171
+
172
+
149
173
def compute_max_children (ir_type , kwargs ):
150
174
if ir_type == "integer" :
151
175
min_value = kwargs ["min_value" ]
@@ -176,16 +200,9 @@ def compute_max_children(ir_type, kwargs):
176
200
return 1
177
201
return 2
178
202
elif ir_type == "bytes" :
179
- min_size = kwargs ["min_size" ]
180
- max_size = kwargs ["max_size" ]
181
-
182
- definitely_too_large = max_size * math .log (2 ** 8 ) > math .log (
183
- MAX_CHILDREN_EFFECTIVELY_INFINITE
203
+ return _count_distinct_strings (
204
+ alphabet_size = 2 ** 8 , min_size = kwargs ["min_size" ], max_size = kwargs ["max_size" ]
184
205
)
185
- if definitely_too_large :
186
- return MAX_CHILDREN_EFFECTIVELY_INFINITE
187
-
188
- return sum (2 ** (8 * k ) for k in range (min_size , max_size + 1 ))
189
206
elif ir_type == "string" :
190
207
min_size = kwargs ["min_size" ]
191
208
max_size = kwargs ["max_size" ]
@@ -196,36 +213,14 @@ def compute_max_children(ir_type, kwargs):
196
213
# Only possibility is the empty string.
197
214
return 1
198
215
199
- # We want to estimate if we're going to have more children than
200
- # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
201
- # extremely expensive pow. We'll check if the number of strings in
202
- # the largest string size alone is enough to put us over this limit.
203
- # We'll also employ a trick of estimating against log, which is cheaper
204
- # than computing a pow.
205
- #
206
- # x = max_size
207
- # y = len(intervals)
208
- # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
209
- #
210
- # x**y > n
211
- # <=> log(x**y) > log(n)
212
- # <=> y * log(x) > log(n)
213
-
214
- # avoid math.log(1) == 0 and incorrectly failing the below estimate,
215
- # even when we definitely are too large.
216
- if len (intervals ) == 1 :
217
- definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
218
- else :
219
- definitely_too_large = max_size * math .log (len (intervals )) > math .log (
220
- MAX_CHILDREN_EFFECTIVELY_INFINITE
221
- )
222
-
223
- if definitely_too_large :
216
+ # avoid math.log(1) == 0 and incorrectly failing our effectively_infinite
217
+ # estimate, even when we definitely are too large.
218
+ if len (intervals ) == 1 and max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE :
224
219
return MAX_CHILDREN_EFFECTIVELY_INFINITE
225
220
226
- # number of strings of length k, for each k in [min_size, max_size].
227
- return sum ( len (intervals ) ** k for k in range ( min_size , max_size + 1 ))
228
-
221
+ return _count_distinct_strings (
222
+ alphabet_size = len (intervals ), min_size = min_size , max_size = max_size
223
+ )
229
224
elif ir_type == "float" :
230
225
min_value = kwargs ["min_value" ]
231
226
max_value = kwargs ["max_value" ]
0 commit comments