HypothesisWorks
diff --git a/‎hypothesis-python/RELEASE.rst
Lines changed: 3 additions & 0 deletions b/‎hypothesis-python/RELEASE.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
Lines changed: 18 additions & 45 deletions b/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
Lines changed: 18 additions & 45 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Lines changed: 40 additions & 27 deletions b/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Lines changed: 40 additions & 27 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py
Lines changed: 2 additions & 2 deletions b/‎hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,3 @@
+RELEASE_TYPE: patch
+
+This patch improves our deduplication tracking across all strategies (:pull:`4007`). Hypothesis is now less likely to generate the same input twice.
@@ -85,20 +85,7 @@ def wrapper(tp):
         return wrapper
 
 
-ONE_BOUND_INTEGERS_LABEL = calc_label_from_name("trying a one-bound int allowing 0")
-INTEGER_RANGE_DRAW_LABEL = calc_label_from_name("another draw in integer_range()")
-BIASED_COIN_LABEL = calc_label_from_name("biased_coin()")
-
 TOP_LABEL = calc_label_from_name("top")
-DRAW_BYTES_LABEL = calc_label_from_name("draw_bytes() in ConjectureData")
-DRAW_FLOAT_LABEL = calc_label_from_name("drawing a float")
-FLOAT_STRATEGY_DO_DRAW_LABEL = calc_label_from_name(
-    "getting another float in FloatStrategy"
-)
-INTEGER_WEIGHTED_DISTRIBUTION = calc_label_from_name(
-    "drawing from a weighted distribution in integers"
-)
-
 InterestingOrigin = Tuple[
     Type[BaseException], str, int, Tuple[Any, ...], Tuple[Tuple[Any, ...], ...]
 ]
@@ -370,11 +357,9 @@ def run(self) -> Any:
         blocks = self.examples.blocks
         for record in self.examples.trail:
             if record == DRAW_BITS_RECORD:
-                self.__push(0)
                 self.bytes_read = blocks.endpoints[self.block_count]
                 self.block(self.block_count)
                 self.block_count += 1
-                self.__pop(discarded=False)
             elif record == IR_NODE_RECORD:
                 data = self.examples.ir_nodes[self.ir_node_count]
                 self.ir_node(data)
@@ -469,8 +454,8 @@ class ExampleRecord:
     """
 
     def __init__(self) -> None:
-        self.labels = [DRAW_BYTES_LABEL]
-        self.__index_of_labels: "Optional[Dict[int, int]]" = {DRAW_BYTES_LABEL: 0}
+        self.labels: List[int] = []
+        self.__index_of_labels: "Optional[Dict[int, int]]" = {}
         self.trail = IntList()
         self.ir_nodes: List[IRNode] = []
 
@@ -522,11 +507,9 @@ def __init__(self, record: ExampleRecord, blocks: "Blocks") -> None:
         self.trail = record.trail
         self.ir_nodes = record.ir_nodes
         self.labels = record.labels
-        self.__length = (
-            self.trail.count(STOP_EXAMPLE_DISCARD_RECORD)
-            + record.trail.count(STOP_EXAMPLE_NO_DISCARD_RECORD)
-            + record.trail.count(DRAW_BITS_RECORD)
-        )
+        self.__length = self.trail.count(
+            STOP_EXAMPLE_DISCARD_RECORD
+        ) + record.trail.count(STOP_EXAMPLE_NO_DISCARD_RECORD)
         self.blocks = blocks
         self.__children: "Optional[List[Sequence[int]]]" = None
 
@@ -649,18 +632,23 @@ def start_example(self, i: int, label_index: int) -> None:
 
     class _mutator_groups(ExampleProperty):
         def begin(self) -> None:
-            self.groups: "Dict[Tuple[int, int], List[int]]" = defaultdict(list)
+            self.groups: "Dict[int, Set[Tuple[int, int]]]" = defaultdict(set)
 
         def start_example(self, i: int, label_index: int) -> None:
-            depth = len(self.example_stack)
-            self.groups[label_index, depth].append(i)
+            # TODO should we discard start == end cases? occurs for eg st.data()
+            # which is conditionally or never drawn from. arguably swapping
+            # nodes with the empty list is a useful mutation enabled by start == end?
+            key = (self.examples[i].ir_start, self.examples[i].ir_end)
+            self.groups[label_index].add(key)
 
-        def finish(self) -> Iterable[Iterable[int]]:
+        def finish(self) -> Iterable[Set[Tuple[int, int]]]:
             # Discard groups with only one example, since the mutator can't
             # do anything useful with them.
             return [g for g in self.groups.values() if len(g) >= 2]
 
-    mutator_groups: List[List[int]] = calculated_example_property(_mutator_groups)
+    mutator_groups: List[Set[Tuple[int, int]]] = calculated_example_property(
+        _mutator_groups
+    )
 
     @property
     def children(self) -> List[Sequence[int]]:
@@ -1338,7 +1326,6 @@ def draw_boolean(
 
         size = 2**bits
 
-        self._cd.start_example(BIASED_COIN_LABEL)
         while True:
             # The logic here is a bit complicated and special cased to make it
             # play better with the shrinker.
@@ -1409,7 +1396,6 @@ def draw_boolean(
                     result = i > falsey
 
             break
-        self._cd.stop_example()
         return result
 
     def draw_integer(
@@ -1460,24 +1446,20 @@ def draw_integer(
             assert max_value is not None  # make mypy happy
             probe = max_value + 1
             while max_value < probe:
-                self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
                 probe = shrink_towards + self._draw_unbounded_integer(
                     forced=None if forced is None else forced - shrink_towards,
                     fake_forced=fake_forced,
                 )
-                self._cd.stop_example()
             return probe
 
         if max_value is None:
             assert min_value is not None
             probe = min_value - 1
             while probe < min_value:
-                self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
                 probe = shrink_towards + self._draw_unbounded_integer(
                     forced=None if forced is None else forced - shrink_towards,
                     fake_forced=fake_forced,
                 )
-                self._cd.stop_example()
             return probe
 
         return self._draw_bounded_integer(
@@ -1518,7 +1500,6 @@ def draw_float(
         assert self._cd is not None
 
         while True:
-            self._cd.start_example(FLOAT_STRATEGY_DO_DRAW_LABEL)
             # If `forced in nasty_floats`, then `forced` was *probably*
             # generated by drawing a nonzero index from the sampler. However, we
             # have no obligation to generate it that way when forcing. In particular,
@@ -1530,7 +1511,6 @@ def draw_float(
                 if sampler
                 else 0
             )
-            self._cd.start_example(DRAW_FLOAT_LABEL)
             if i == 0:
                 result = self._draw_float(
                     forced_sign_bit=forced_sign_bit,
@@ -1546,8 +1526,6 @@ def draw_float(
                     assert pos_clamper is not None
                     clamped = pos_clamper(result)
                 if clamped != result and not (math.isnan(result) and allow_nan):
-                    self._cd.stop_example()
-                    self._cd.start_example(DRAW_FLOAT_LABEL)
                     self._draw_float(forced=clamped, fake_forced=fake_forced)
                     result = clamped
             else:
@@ -1576,8 +1554,6 @@ def draw_float(
 
                 self._draw_float(forced=result, fake_forced=fake_forced)
 
-            self._cd.stop_example()  # (DRAW_FLOAT_LABEL)
-            self._cd.stop_example()  # (FLOAT_STRATEGY_DO_DRAW_LABEL)
             return result
 
     def draw_string(
@@ -1771,7 +1747,6 @@ def _draw_bounded_integer(
                 7 / 8, forced=None if forced is None else False, fake_forced=fake_forced
             )
         ):
-            self._cd.start_example(INTEGER_WEIGHTED_DISTRIBUTION)
             # For large ranges, we combine the uniform random distribution from draw_bits
             # with a weighting scheme with moderate chance.  Cutoff at 2 ** 24 so that our
             # choice of unicode characters is uniform but the 32bit distribution is not.
@@ -1782,18 +1757,15 @@ def _draw_bounded_integer(
                 upper=center if not above else min(upper, center + 2**force_bits - 1),
                 _vary_effective_size=False,
             )
-            self._cd.stop_example()
 
             assert lower <= forced <= upper
 
         while probe > gap:
-            self._cd.start_example(INTEGER_RANGE_DRAW_LABEL)
             probe = self._cd.draw_bits(
                 bits,
                 forced=None if forced is None else abs(forced - center),
                 fake_forced=fake_forced,
             )
-            self._cd.stop_example()
 
         if above:
             result = center + probe
@@ -1938,12 +1910,13 @@ def for_ir_tree(
         *,
         observer: Optional[DataObserver] = None,
         provider: Union[type, PrimitiveProvider] = HypothesisProvider,
+        max_length: Optional[int] = None,
     ) -> "ConjectureData":
         from hypothesis.internal.conjecture.engine import BUFFER_SIZE
 
         return cls(
-            BUFFER_SIZE,
-            b"",
+            max_length=BUFFER_SIZE if max_length is None else max_length,
+            prefix=b"",
             random=None,
             ir_tree_prefix=ir_tree_prefix,
             observer=observer,
 
@@ -381,16 +381,30 @@ def _cache(self, data: ConjectureData) -> None:
             self.__data_cache_ir[key] = result
 
     def cached_test_function_ir(
-        self, nodes: List[IRNode]
+        self, nodes: List[IRNode], *, error_on_discard: bool = False
     ) -> Union[ConjectureResult, _Overrun]:
         key = self._cache_key_ir(nodes=nodes)
         try:
             return self.__data_cache_ir[key]
         except KeyError:
             pass
 
+        # explicitly use a no-op DataObserver here instead of a TreeRecordingObserver.
+        # The reason is we don't expect simulate_test_function to explore new choices
+        # and write back to the tree, so we don't want the overhead of the
+        # TreeRecordingObserver tracking those calls.
+        trial_observer: Optional[DataObserver] = DataObserver()
+        if error_on_discard:
+
+            class DiscardObserver(DataObserver):
+                @override
+                def kill_branch(self) -> NoReturn:
+                    raise ContainsDiscard
+
+            trial_observer = DiscardObserver()
+
         try:
-            trial_data = self.new_conjecture_data_ir(nodes)
+            trial_data = self.new_conjecture_data_ir(nodes, observer=trial_observer)
             self.tree.simulate_test_function(trial_data)
         except PreviouslyUnseenBehaviour:
             pass
@@ -1063,13 +1077,24 @@ def generate_mutations_from(
 
                 group = self.random.choice(groups)
 
-                ex1, ex2 = (
-                    data.examples[i] for i in sorted(self.random.sample(group, 2))
-                )
-                assert ex1.end <= ex2.start
+                (start1, end1), (start2, end2) = self.random.sample(sorted(group), 2)
+                if (start1 <= start2 <= end2 <= end1) or (
+                    start2 <= start1 <= end1 <= end2
+                ):
+                    # one example entirely contains the other. give up.
+                    # TODO use more intelligent mutation for containment, like
+                    # replacing child with parent or vice versa. Would allow for
+                    # recursive / subtree mutation
+                    failed_mutations += 1
+                    continue
 
-                e = self.random.choice([ex1, ex2])
-                replacement = data.buffer[e.start : e.end]
+                if start1 > start2:
+                    (start1, end1), (start2, end2) = (start2, end2), (start1, end1)
+                assert end1 <= start2
+
+                nodes = data.examples.ir_tree_nodes
+                (start, end) = self.random.choice([(start1, end1), (start2, end2)])
+                replacement = nodes[start:end]
 
                 try:
                     # We attempt to replace both the examples with
@@ -1080,17 +1105,16 @@ def generate_mutations_from(
                     # really matter. It may not achieve the desired result,
                     # but it's still a perfectly acceptable choice sequence
                     # to try.
-                    new_data = self.cached_test_function(
-                        data.buffer[: ex1.start]
+                    new_data = self.cached_test_function_ir(
+                        nodes[:start1]
                         + replacement
-                        + data.buffer[ex1.end : ex2.start]
+                        + nodes[end1:start2]
                         + replacement
-                        + data.buffer[ex2.end :],
+                        + nodes[end2:],
                         # We set error_on_discard so that we don't end up
                         # entering parts of the tree we consider redundant
                         # and not worth exploring.
                         error_on_discard=True,
-                        extend=BUFFER_SIZE,
                     )
                 except ContainsDiscard:
                     failed_mutations += 1
@@ -1184,6 +1208,7 @@ def new_conjecture_data_ir(
         ir_tree_prefix: List[IRNode],
         *,
         observer: Optional[DataObserver] = None,
+        max_length: Optional[int] = None,
     ) -> ConjectureData:
         provider = (
             HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
@@ -1193,7 +1218,7 @@ def new_conjecture_data_ir(
             observer = DataObserver()
 
         return ConjectureData.for_ir_tree(
-            ir_tree_prefix, observer=observer, provider=provider
+            ir_tree_prefix, observer=observer, provider=provider, max_length=max_length
         )
 
     def new_conjecture_data(
@@ -1331,7 +1356,6 @@ def cached_test_function(
         self,
         buffer: Union[bytes, bytearray],
         *,
-        error_on_discard: bool = False,
         extend: int = 0,
     ) -> Union[ConjectureResult, _Overrun]:
         """Checks the tree to see if we've tested this buffer, and returns the
@@ -1370,18 +1394,7 @@ def check_result(
         except KeyError:
             pass
 
-        observer: DataObserver
-        if error_on_discard:
-
-            class DiscardObserver(DataObserver):
-                @override
-                def kill_branch(self) -> NoReturn:
-                    raise ContainsDiscard
-
-            observer = DiscardObserver()
-        else:
-            observer = DataObserver()
-
+        observer = DataObserver()
         dummy_data = self.new_conjecture_data(
             prefix=buffer, max_length=max_length, observer=observer
         )
 
@@ -136,12 +136,12 @@ def attempt_replace(v):
 
                     for i, ex in enumerate(self.current_data.examples):
                         if ex.start >= block.end:
-                            break
+                            break  # pragma: no cover
                         if ex.end <= block.start:
                             continue
                         ex_attempt = attempt.examples[i]
                         if ex.length == ex_attempt.length:
-                            continue
+                            continue  # pragma: no cover
                         replacement = attempt.buffer[ex_attempt.start : ex_attempt.end]
                         if self.consider_new_test_data(
                             self.engine.cached_test_function(
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+RELEASE_TYPE: patch`
	`2`	`+`
	`3`	+This patch improves our deduplication tracking across all strategies (:pull:`4007`). Hypothesis is now less likely to generate the same input twice.