Skip to content

Commit c50ffd4

Browse files
committed
Improve numeric pprinting
1 parent b597542 commit c50ffd4

File tree

3 files changed

+52
-17
lines changed

3 files changed

+52
-17
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch improves our pretty-printer for unusual numbers.
4+
5+
- Signalling NaNs are now represented by using the :mod:`struct` module
6+
to show the exact value by converting from a hexadecimal integer
7+
8+
- CPython `limits integer-to-string conversions
9+
<https://docs.python.org/3/library/stdtypes.html#integer-string-conversion-length-limitation>`__
10+
to mitigate DDOS attacks. We now use hexadecimal for very large
11+
integers, and include underscore separators for integers with ten
12+
or more digits.

hypothesis-python/src/hypothesis/vendor/pretty.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,6 @@ def __init__(self, output=None, *, context=None):
143143
self.group_queue = GroupQueue(root_group)
144144
self.indentation = 0
145145

146-
self.snans = 0
147-
148146
self.stack = []
149147
self.singleton_pprinters = {}
150148
self.type_pprinters = {}
@@ -358,12 +356,6 @@ def _enumerate(self, seq):
358356

359357
def flush(self):
360358
"""Flush data that is left in the buffer."""
361-
if self.snans:
362-
# Reset self.snans *before* calling breakable(), which might flush()
363-
snans = self.snans
364-
self.snans = 0
365-
self.breakable(" ")
366-
self.text(f"# Saw {snans} signaling NaN" + "s" * (snans > 1))
367359
for data in self.buffer:
368360
self.output_width += data.output(self.output, self.output_width)
369361
self.buffer.clear()
@@ -747,19 +739,31 @@ def _exception_pprint(obj, p, cycle):
747739
p.pretty(arg)
748740

749741

742+
def _repr_integer(obj, p, cycle):
743+
if abs(obj) < 1_000_000_000:
744+
p.text(repr(obj))
745+
elif abs(obj) < 10**640:
746+
# add underscores for integers over ten decimal digits
747+
p.text(f"{obj:#_d}")
748+
else:
749+
# for very very large integers, use hex because power-of-two bases are cheaper
750+
# https://docs.python.org/3/library/stdtypes.html#integer-string-conversion-length-limitation
751+
p.text(f"{obj:#_x}")
752+
753+
750754
def _repr_float_counting_nans(obj, p, cycle):
751-
if isnan(obj) and hasattr(p, "snans"):
755+
if isnan(obj):
752756
if struct.pack("!d", abs(obj)) != struct.pack("!d", float("nan")):
753-
p.snans += 1
754-
if copysign(1.0, obj) == -1.0:
755-
p.text("-nan")
756-
return
757+
show = hex(*struct.unpack("Q", struct.pack("d", obj)))
758+
return p.text(f"struct.unpack('d', struct.pack('Q', {show}))[0]")
759+
elif copysign(1.0, obj) == -1.0:
760+
return p.text("-nan")
757761
p.text(repr(obj))
758762

759763

760764
#: printers for builtin types
761765
_type_pprinters = {
762-
int: _repr_pprint,
766+
int: _repr_integer,
763767
float: _repr_float_counting_nans,
764768
str: _repr_pprint,
765769
tuple: _seq_pprinter_factory("(", ")", tuple),

hypothesis-python/tests/cover/test_pretty.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"""
4949

5050
import re
51+
import struct
5152
import warnings
5253
from collections import Counter, OrderedDict, defaultdict, deque
5354
from enum import Enum, Flag
@@ -58,6 +59,7 @@
5859
from hypothesis import given, strategies as st
5960
from hypothesis.control import current_build_context
6061
from hypothesis.internal.compat import PYPY
62+
from hypothesis.internal.conjecture.floats import float_to_lex
6163
from hypothesis.internal.floats import SIGNALING_NAN
6264
from hypothesis.vendor import pretty
6365

@@ -603,13 +605,15 @@ def test_breakable_at_group_boundary():
603605
[
604606
(float("nan"), "nan"),
605607
(-float("nan"), "-nan"),
606-
(SIGNALING_NAN, "nan # Saw 1 signaling NaN"),
607-
(-SIGNALING_NAN, "-nan # Saw 1 signaling NaN"),
608-
((SIGNALING_NAN, SIGNALING_NAN), "(nan, nan) # Saw 2 signaling NaNs"),
608+
(SIGNALING_NAN, "struct.unpack('d', struct.pack('Q', 0x7ff8000000000001))[0]"),
609+
(-SIGNALING_NAN, "struct.unpack('d', struct.pack('Q', 0xfff8000000000001))[0]"),
609610
],
610611
)
611612
def test_nan_reprs(obj, rep):
612613
assert pretty.pretty(obj) == rep
614+
assert float_to_lex(obj) == float_to_lex(
615+
eval(rep, {"struct": struct, "nan": float("nan")})
616+
)
613617

614618

615619
def _repr_call(*args, **kwargs):
@@ -739,3 +743,18 @@ def test_pprint_map_with_cycle(data):
739743
p = pretty.RepresentationPrinter(context=current_build_context())
740744
p.pretty(x)
741745
assert p.getvalue() == "ValidSyntaxRepr(...)"
746+
747+
748+
def test_pprint_large_integers():
749+
p = pretty.RepresentationPrinter()
750+
p.pretty(1234567890)
751+
assert p.getvalue() == "1_234_567_890"
752+
753+
754+
def test_pprint_extremely_large_integers():
755+
x = 10**5000 # repr fails with ddos error
756+
p = pretty.RepresentationPrinter()
757+
p.pretty(x)
758+
got = p.getvalue()
759+
assert got == f"{x:#_x}" # hexadecimal with underscores
760+
assert eval(got) == x

0 commit comments

Comments
 (0)