Skip to content

Commit d863cb1

Browse files
authored
Merge pull request #4163 from tybug/optimiser-ir
Migrate `Optimiser` to the typed choice sequence
2 parents a0f11e7 + 71ec431 commit d863cb1

File tree

3 files changed

+143
-61
lines changed

3 files changed

+143
-61
lines changed

hypothesis-python/RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch migrates the optimisation algorithm for :ref:`targeted property-based testing <targeted-search>` to our IR layer (:issue:`3921`). This should result in moderately different (and hopefully improved) exploration behavior in tests which use :func:`hypothesis.target`.

hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py

+99-58
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,16 @@
1111
from typing import Union
1212

1313
from hypothesis.internal.compat import int_from_bytes, int_to_bytes
14-
from hypothesis.internal.conjecture.data import ConjectureResult, Status, _Overrun
15-
from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner
14+
from hypothesis.internal.conjecture.data import (
15+
ConjectureResult,
16+
IRType,
17+
Status,
18+
_Overrun,
19+
bits_to_bytes,
20+
ir_size_nodes,
21+
ir_value_permitted,
22+
)
23+
from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR, ConjectureRunner
1624
from hypothesis.internal.conjecture.junkdrawer import find_integer
1725
from hypothesis.internal.conjecture.pareto import NO_SCORE
1826

@@ -75,9 +83,11 @@ def consider_new_data(self, data: Union[ConjectureResult, _Overrun]) -> bool:
7583
return True
7684
assert score == self.current_score
7785
# We allow transitions that leave the score unchanged as long as they
78-
# don't increase the buffer size. This gives us a certain amount of
86+
# don't increase the number of nodes. This gives us a certain amount of
7987
# freedom for lateral moves that will take us out of local maxima.
80-
if len(data.buffer) <= len(self.current_data.buffer):
88+
if len(data.examples.ir_tree_nodes) <= len(
89+
self.current_data.examples.ir_tree_nodes
90+
):
8191
self.current_data = data
8292
return True
8393
return False
@@ -88,92 +98,123 @@ def hill_climb(self) -> None:
8898
a data object and returns an index to an example where we should focus
8999
our efforts."""
90100

91-
blocks_examined = set()
101+
nodes_examined = set()
92102

93103
prev = None
94-
i = len(self.current_data.blocks) - 1
104+
i = len(self.current_data.examples.ir_tree_nodes) - 1
95105
while i >= 0 and self.improvements <= self.max_improvements:
96106
if prev is not self.current_data:
97-
i = len(self.current_data.blocks) - 1
107+
i = len(self.current_data.examples.ir_tree_nodes) - 1
98108
prev = self.current_data
99109

100-
if i in blocks_examined:
110+
if i in nodes_examined:
101111
i -= 1
102112
continue
103113

104-
blocks_examined.add(i)
105-
data = self.current_data
106-
block = data.blocks[i]
107-
prefix = data.buffer[: block.start]
114+
nodes_examined.add(i)
115+
node = self.current_data.examples.ir_tree_nodes[i]
116+
assert node.index is not None
117+
# we can only (sensibly & easily) define hill climbing for
118+
# numeric-style nodes. It's not clear hill-climbing a string is
119+
# useful, for instance.
120+
if node.ir_type not in {"integer", "float", "bytes", "boolean"}:
121+
continue
108122

109-
existing = data.buffer[block.start : block.end]
110-
existing_as_int = int_from_bytes(existing)
111-
max_int_value = (256 ** len(existing)) - 1
123+
def attempt_replace(k: int) -> bool:
124+
"""
125+
Try replacing the current node in the current best test case
126+
with a value which is "k times larger", where the exact notion
127+
of "larger" depends on the ir_type.
128+
129+
Note that we use the *current* best and not the one we started with.
130+
This helps ensure that if we luck into a good draw when making
131+
random choices we get to keep the good bits.
132+
"""
133+
# we don't want to infinitely drive up an unbounded score.
134+
if abs(k) > 2**20:
135+
return False
112136

113-
if existing_as_int == max_int_value:
114-
continue
137+
node = self.current_data.examples.ir_tree_nodes[i]
138+
assert node.index is not None
139+
if node.was_forced:
140+
return False # pragma: no cover
141+
142+
new_value: IRType
143+
if node.ir_type in {"integer", "float"}:
144+
assert isinstance(node.value, (int, float))
145+
new_value = node.value + k
146+
elif node.ir_type == "boolean":
147+
assert isinstance(node.value, bool)
148+
if abs(k) > 1:
149+
return False
150+
if k == -1:
151+
new_value = False
152+
if k == 1:
153+
new_value = True
154+
if k == 0: # pragma: no cover
155+
new_value = node.value
156+
else:
157+
assert node.ir_type == "bytes"
158+
assert isinstance(node.value, bytes)
159+
v = int_from_bytes(node.value)
160+
# can't go below zero for bytes
161+
if v + k < 0:
162+
return False
163+
v += k
164+
# allow adding k to increase the number of bytes. we don't want
165+
# to decrease so that b"01" doesn't turn into b"1".
166+
size = max(len(node.value), bits_to_bytes(v.bit_length()))
167+
new_value = int_to_bytes(v, size)
115168

116-
def attempt_replace(v: int) -> bool:
117-
"""Try replacing the current block in the current best test case
118-
with an integer of value i. Note that we use the *current*
119-
best and not the one we started with. This helps ensure that
120-
if we luck into a good draw when making random choices we get
121-
to keep the good bits."""
122-
if v < 0 or v > max_int_value:
169+
if not ir_value_permitted(new_value, node.ir_type, node.kwargs):
123170
return False
124-
v_as_bytes = int_to_bytes(v, len(existing))
125171

126-
# We make a couple attempts at replacement. This only matters
127-
# if we end up growing the buffer - otherwise we exit the loop
128-
# early - but in the event that there *is* some randomized
129-
# component we want to give it a couple of tries to succeed.
130172
for _ in range(3):
131-
attempt = self.engine.cached_test_function(
132-
prefix
133-
+ v_as_bytes
134-
+ self.current_data.buffer[block.end :]
135-
+ bytes(BUFFER_SIZE),
173+
nodes = self.current_data.examples.ir_tree_nodes
174+
attempt_nodes = (
175+
nodes[: node.index]
176+
+ (node.copy(with_value=new_value),)
177+
+ nodes[node.index + 1 :]
178+
)
179+
attempt = self.engine.cached_test_function_ir(
180+
attempt_nodes,
181+
extend=BUFFER_SIZE_IR - ir_size_nodes(attempt_nodes),
136182
)
137183

138184
if self.consider_new_data(attempt):
139185
return True
140186

141-
if attempt.status == Status.OVERRUN:
187+
if attempt.status is Status.OVERRUN:
142188
return False
143189

144190
assert isinstance(attempt, ConjectureResult)
145-
if len(attempt.buffer) == len(self.current_data.buffer):
191+
if len(attempt.examples.ir_tree_nodes) == len(
192+
self.current_data.examples.ir_tree_nodes
193+
):
146194
return False
147195

148-
for i, ex in enumerate(self.current_data.examples):
149-
if ex.start >= block.end:
196+
for j, ex in enumerate(self.current_data.examples):
197+
if ex.ir_start >= node.index + 1:
150198
break # pragma: no cover
151-
if ex.end <= block.start:
199+
if ex.ir_end <= node.index:
152200
continue
153-
ex_attempt = attempt.examples[i]
154-
if ex.length == ex_attempt.length:
201+
ex_attempt = attempt.examples[j]
202+
if ex.ir_length == ex_attempt.ir_length:
155203
continue # pragma: no cover
156-
replacement = attempt.buffer[ex_attempt.start : ex_attempt.end]
204+
replacement = attempt.examples.ir_tree_nodes[
205+
ex_attempt.ir_start : ex_attempt.ir_end
206+
]
157207
if self.consider_new_data(
158-
self.engine.cached_test_function(
159-
prefix
208+
self.engine.cached_test_function_ir(
209+
nodes[: node.index]
160210
+ replacement
161-
+ self.current_data.buffer[ex.end :]
211+
+ self.current_data.examples.ir_tree_nodes[ex.ir_end :]
162212
)
163213
):
164214
return True
165215
return False
166216

167-
# We unconditionally scan both upwards and downwards. The reason
168-
# for this is that we allow "lateral" moves that don't increase the
169-
# score but instead leave it constant. All else being equal we'd
170-
# like to leave the test case closer to shrunk, so afterwards we
171-
# try lowering the value towards zero even if we've just raised it.
172-
173-
if not attempt_replace(max_int_value):
174-
find_integer(lambda k: attempt_replace(k + existing_as_int))
175-
176-
existing = self.current_data.buffer[block.start : block.end]
177-
existing_as_int = int_from_bytes(existing)
178-
if not attempt_replace(0):
179-
find_integer(lambda k: attempt_replace(existing_as_int - k))
217+
# we don't know whether a target score increases or decreases with
218+
# respect to the value of some node, so try both directions.
219+
find_integer(lambda k: attempt_replace(k))
220+
find_integer(lambda k: attempt_replace(-k))

hypothesis-python/tests/conjecture/test_optimiser.py

+41-3
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@
88
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
99
# obtain one at https://mozilla.org/MPL/2.0/.
1010

11+
import math
12+
1113
import pytest
1214

13-
from hypothesis import settings
15+
from hypothesis import assume, example, given, settings
1416
from hypothesis.internal.compat import int_to_bytes
15-
from hypothesis.internal.conjecture.data import Status
17+
from hypothesis.internal.conjecture.data import IRNode, Status
18+
from hypothesis.internal.conjecture.datatree import compute_max_children
1619
from hypothesis.internal.conjecture.engine import ConjectureRunner, RunIsComplete
1720
from hypothesis.internal.entropy import deterministic_PRNG
1821

19-
from tests.conjecture.common import TEST_SETTINGS, buffer_size_limit
22+
from tests.conjecture.common import TEST_SETTINGS, buffer_size_limit, ir_nodes
2023

2124

2225
def test_optimises_to_maximum():
@@ -219,3 +222,38 @@ def test(data):
219222
pass
220223

221224
assert runner.best_observed_targets["m"] == 100
225+
226+
227+
@given(ir_nodes())
228+
@example(
229+
IRNode(
230+
ir_type="bytes",
231+
value=b"\xb1",
232+
kwargs={"min_size": 1, "max_size": 1},
233+
was_forced=False,
234+
)
235+
)
236+
def test_optimising_all_nodes(node):
237+
assume(compute_max_children(node.ir_type, node.kwargs) > 100)
238+
size_function = {
239+
"integer": lambda n: n,
240+
"float": lambda f: f if math.isfinite(f) else 0,
241+
"string": lambda s: len(s),
242+
"bytes": lambda b: len(b),
243+
"boolean": lambda b: int(b),
244+
}
245+
with deterministic_PRNG():
246+
247+
def test(data):
248+
v = getattr(data, f"draw_{node.ir_type}")(**node.kwargs)
249+
data.target_observations["v"] = size_function[node.ir_type](v)
250+
251+
runner = ConjectureRunner(
252+
test, settings=settings(TEST_SETTINGS, max_examples=50)
253+
)
254+
runner.cached_test_function_ir([node])
255+
256+
try:
257+
runner.optimise_targets()
258+
except RunIsComplete:
259+
pass

0 commit comments

Comments
 (0)