Merge pull request #4065 from Zac-HD/numeric-pprinter

Zac-HD · web-flow · commit 5221dbdfda22 · 2024-08-04T01:47:20.000-07:00
Improve numeric pprinting
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,12 @@
+RELEASE_TYPE: patch
+
+This patch improves our pretty-printer for unusual numbers.
+
+- Signalling NaNs are now represented by using the :mod:`struct` module
+  to show the exact value by converting from a hexadecimal integer
+
+- CPython `limits integer-to-string conversions
+  <https://docs.python.org/3/library/stdtypes.html#integer-string-conversion-length-limitation>`__
+  to mitigate DDOS attacks.  We now use hexadecimal for very large
+  integers, and include underscore separators for integers with ten
+  or more digits.
diff --git a/hypothesis-python/setup.py b/hypothesis-python/setup.py
@@ -60,7 +60,7 @@ def local_file(name):
     "pytest": ["pytest>=4.6"],
     "dpcontracts": ["dpcontracts>=0.4"],
     "redis": ["redis>=3.0.0"],
-    "crosshair": ["hypothesis-crosshair>=0.0.9", "crosshair-tool>=0.0.63"],
+    "crosshair": ["hypothesis-crosshair>=0.0.11", "crosshair-tool>=0.0.65"],
     # zoneinfo is an odd one: every dependency is conditional, because they're
     # only necessary on old versions of Python or Windows systems or emscripten.
     "zoneinfo": [
diff --git a/hypothesis-python/src/hypothesis/vendor/pretty.py b/hypothesis-python/src/hypothesis/vendor/pretty.py
@@ -143,8 +143,6 @@ def __init__(self, output=None, *, context=None):
         self.group_queue = GroupQueue(root_group)
         self.indentation = 0
 
-        self.snans = 0
-
         self.stack = []
         self.singleton_pprinters = {}
         self.type_pprinters = {}
@@ -358,12 +356,6 @@ def _enumerate(self, seq):
 
     def flush(self):
         """Flush data that is left in the buffer."""
-        if self.snans:
-            # Reset self.snans *before* calling breakable(), which might flush()
-            snans = self.snans
-            self.snans = 0
-            self.breakable("  ")
-            self.text(f"# Saw {snans} signaling NaN" + "s" * (snans > 1))
         for data in self.buffer:
             self.output_width += data.output(self.output, self.output_width)
         self.buffer.clear()
@@ -747,19 +739,31 @@ def _exception_pprint(obj, p, cycle):
             p.pretty(arg)
 
 
+def _repr_integer(obj, p, cycle):
+    if abs(obj) < 1_000_000_000:
+        p.text(repr(obj))
+    elif abs(obj) < 10**640:
+        # add underscores for integers over ten decimal digits
+        p.text(f"{obj:#_d}")
+    else:
+        # for very very large integers, use hex because power-of-two bases are cheaper
+        # https://docs.python.org/3/library/stdtypes.html#integer-string-conversion-length-limitation
+        p.text(f"{obj:#_x}")
+
+
 def _repr_float_counting_nans(obj, p, cycle):
-    if isnan(obj) and hasattr(p, "snans"):
+    if isnan(obj):
         if struct.pack("!d", abs(obj)) != struct.pack("!d", float("nan")):
-            p.snans += 1
-        if copysign(1.0, obj) == -1.0:
-            p.text("-nan")
-            return
+            show = hex(*struct.unpack("Q", struct.pack("d", obj)))
+            return p.text(f"struct.unpack('d', struct.pack('Q', {show}))[0]")
+        elif copysign(1.0, obj) == -1.0:
+            return p.text("-nan")
     p.text(repr(obj))
 
 
 #: printers for builtin types
 _type_pprinters = {
-    int: _repr_pprint,
+    int: _repr_integer,
     float: _repr_float_counting_nans,
     str: _repr_pprint,
     tuple: _seq_pprinter_factory("(", ")", tuple),
diff --git a/hypothesis-python/tests/cover/test_cache_implementation.py b/hypothesis-python/tests/cover/test_cache_implementation.py
@@ -56,14 +56,18 @@ def on_access(self, key, value, score):
 
 
 @st.composite
-def write_pattern(draw, min_size=0):
-    keys = draw(st.lists(st.integers(0, 1000), unique=True, min_size=1))
+def write_pattern(draw, min_distinct_keys=0):
+    keys = draw(
+        st.lists(st.integers(0, 1000), unique=True, min_size=max(min_distinct_keys, 1))
+    )
     values = draw(st.lists(st.integers(), unique=True, min_size=1))
-    return draw(
-        st.lists(
-            st.tuples(st.sampled_from(keys), st.sampled_from(values)), min_size=min_size
-        )
+    s = st.lists(
+        st.tuples(st.sampled_from(keys), st.sampled_from(values)),
+        min_size=min_distinct_keys,
     )
+    if min_distinct_keys > 0:
+        s = s.filter(lambda ls: len({k for k, _ in ls}) >= min_distinct_keys)
+    return draw(s)
 
 
 class ValueScored(GenericCache):
@@ -111,8 +115,12 @@ def test_behaves_like_a_dict_with_losses(implementation, writes, size):
         assert len(target) <= min(len(model), size)
 
 
-@settings(suppress_health_check=[HealthCheck.too_slow], deadline=None)
-@given(write_pattern(min_size=2), st.data())
+@settings(
+    suppress_health_check={HealthCheck.too_slow}
+    | set(settings.get_profile(settings._current_profile).suppress_health_check),
+    deadline=None,
+)
+@given(write_pattern(min_distinct_keys=2), st.data())
 def test_always_evicts_the_lowest_scoring_value(writes, data):
     scores = {}
 
diff --git a/hypothesis-python/tests/cover/test_explicit_examples.py b/hypothesis-python/tests/cover/test_explicit_examples.py
@@ -224,7 +224,7 @@ def test(a):
 
 @fails_with(DeadlineExceeded)
 @example(10)
-@settings(phases=[Phase.explicit])
+@settings(phases=[Phase.explicit], deadline=1)
 @given(integers())
 def test(x):
     time.sleep(10)
diff --git a/hypothesis-python/tests/cover/test_float_nastiness.py b/hypothesis-python/tests/cover/test_float_nastiness.py
@@ -41,12 +41,10 @@
         (-sys.float_info.max, sys.float_info.max),
     ],
 )
-def test_floats_are_in_range(lower, upper):
-    @given(st.floats(lower, upper))
-    def test_is_in_range(t):
-        assert lower <= t <= upper
-
-    test_is_in_range()
+@given(data=st.data())
+def test_floats_are_in_range(data, lower, upper):
+    t = data.draw(st.floats(lower, upper))
+    assert lower <= t <= upper
 
 
 @pytest.mark.parametrize("sign", [-1, 1])
diff --git a/hypothesis-python/tests/cover/test_health_checks.py b/hypothesis-python/tests/cover/test_health_checks.py
@@ -91,7 +91,7 @@ def test2(x):
 
 def test_filtering_everything_fails_a_health_check():
     @given(st.integers().filter(lambda x: False))
-    @settings(database=None)
+    @settings(database=None, suppress_health_check=())
     def test(x):
         pass
 
diff --git a/hypothesis-python/tests/cover/test_pretty.py b/hypothesis-python/tests/cover/test_pretty.py
@@ -48,6 +48,7 @@
 """
 
 import re
+import struct
 import warnings
 from collections import Counter, OrderedDict, defaultdict, deque
 from enum import Enum, Flag
@@ -58,6 +59,7 @@
 from hypothesis import given, strategies as st
 from hypothesis.control import current_build_context
 from hypothesis.internal.compat import PYPY
+from hypothesis.internal.conjecture.floats import float_to_lex
 from hypothesis.internal.floats import SIGNALING_NAN
 from hypothesis.vendor import pretty
 
@@ -603,13 +605,15 @@ def test_breakable_at_group_boundary():
     [
         (float("nan"), "nan"),
         (-float("nan"), "-nan"),
-        (SIGNALING_NAN, "nan  # Saw 1 signaling NaN"),
-        (-SIGNALING_NAN, "-nan  # Saw 1 signaling NaN"),
-        ((SIGNALING_NAN, SIGNALING_NAN), "(nan, nan)  # Saw 2 signaling NaNs"),
+        (SIGNALING_NAN, "struct.unpack('d', struct.pack('Q', 0x7ff8000000000001))[0]"),
+        (-SIGNALING_NAN, "struct.unpack('d', struct.pack('Q', 0xfff8000000000001))[0]"),
     ],
 )
 def test_nan_reprs(obj, rep):
     assert pretty.pretty(obj) == rep
+    assert float_to_lex(obj) == float_to_lex(
+        eval(rep, {"struct": struct, "nan": float("nan")})
+    )
 
 
 def _repr_call(*args, **kwargs):
@@ -739,3 +743,18 @@ def test_pprint_map_with_cycle(data):
     p = pretty.RepresentationPrinter(context=current_build_context())
     p.pretty(x)
     assert p.getvalue() == "ValidSyntaxRepr(...)"
+
+
+def test_pprint_large_integers():
+    p = pretty.RepresentationPrinter()
+    p.pretty(1234567890)
+    assert p.getvalue() == "1_234_567_890"
+
+
+def test_pprint_extremely_large_integers():
+    x = 10**5000  # repr fails with ddos error
+    p = pretty.RepresentationPrinter()
+    p.pretty(x)
+    got = p.getvalue()
+    assert got == f"{x:#_x}"  # hexadecimal with underscores
+    assert eval(got) == x
diff --git a/hypothesis-python/tests/nocover/test_recursive.py b/hypothesis-python/tests/nocover/test_recursive.py
@@ -73,22 +73,14 @@ def breadth(x):
 
 
 def test_drawing_many_near_boundary():
-    target = 4
-
-    ls = minimal(
-        st.lists(
-            st.recursive(
-                st.booleans(),
-                lambda x: st.lists(
-                    x, min_size=2 * (target - 1), max_size=2 * target
-                ).map(tuple),
-                max_leaves=2 * target - 1,
-            )
-        ),
-        lambda x: len(set(x)) >= target,
-        timeout_after=None,
+    size = 4
+    elems = st.recursive(
+        st.booleans(),
+        lambda x: st.lists(x, min_size=2 * (size - 1), max_size=2 * size).map(tuple),
+        max_leaves=2 * size - 1,
     )
-    assert len(ls) == target
+    ls = minimal(st.lists(elems), lambda x: len(set(x)) >= size, timeout_after=None)
+    assert len(ls) == size
 
 
 def test_can_use_recursive_data_in_sets():