Skip to content

Commit fbe0d47

Browse files
authored
Merge pull request #4348 from tybug/collect-ast-constants
Initial work on collecting ast constants
2 parents 7c49f2d + 22ff77f commit fbe0d47

File tree

6 files changed

+322
-11
lines changed

6 files changed

+322
-11
lines changed

.github/workflows/main.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,14 @@ jobs:
252252
version: ${{ env.EMSCRIPTEN_VERSION }}
253253
- name: Build
254254
run: |
255+
# TODO remove https://github.com/pyodide/pyodide/issues/5585
256+
pip install -U wheel==0.45.1
255257
pip install pyodide-build==$PYODIDE_VERSION
256258
cd hypothesis-python/
257259
CFLAGS=-g2 LDFLAGS=-g2 pyodide build
258260
- name: Set up Pyodide venv and install dependencies
259261
run: |
260-
pip install --upgrade setuptools pip wheel build
262+
pip install --upgrade setuptools pip wheel==0.45.1 build
261263
python -m build --wheel hypothesis-python --outdir dist/
262264
pip download --dest=dist/ hypothesis-python/ pytest tzdata # fetch all the wheels
263265

hypothesis-python/RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
Lays some groundwork for future work on collecting interesting literals from the code being tested, for increased bug-finding power (:issue:`3127`). There is no user-visible change (yet!)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# This file is part of Hypothesis, which may be found at
2+
# https://github.com/HypothesisWorks/hypothesis/
3+
#
4+
# Copyright the Hypothesis Authors.
5+
# Individual contributors are listed in AUTHORS.rst and the git log.
6+
#
7+
# This Source Code Form is subject to the terms of the Mozilla Public License,
8+
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9+
# obtain one at https://mozilla.org/MPL/2.0/.
10+
11+
import ast
12+
import inspect
13+
import math
14+
import sys
15+
from ast import AST, Constant, Expr, NodeVisitor, UnaryOp, USub
16+
from functools import lru_cache
17+
from types import ModuleType
18+
from typing import TYPE_CHECKING, Optional, Union
19+
20+
from hypothesis.internal.escalation import is_hypothesis_file
21+
from hypothesis.internal.scrutineer import ModuleLocation
22+
23+
if TYPE_CHECKING:
24+
from typing import TypeAlias
25+
26+
ConstantT: "TypeAlias" = Union[int, float, bool, bytes, str]
27+
28+
29+
class ConstantVisitor(NodeVisitor):
30+
def __init__(self):
31+
super().__init__()
32+
self.constants: set[ConstantT] = set()
33+
34+
def _add_constant(self, constant: object) -> None:
35+
self.constants |= self._unfold_constant(constant)
36+
37+
def visit_UnaryOp(self, node: UnaryOp) -> None:
38+
# `a = -1` is actually a combination of a USub and the constant 1.
39+
if (
40+
isinstance(node.op, USub)
41+
and isinstance(node.operand, Constant)
42+
and isinstance(node.operand.value, (int, float))
43+
and not isinstance(node.operand.value, bool)
44+
):
45+
self._add_constant(-node.operand.value)
46+
# don't recurse on this node to avoid adding the positive variant
47+
return
48+
49+
self.generic_visit(node)
50+
51+
def visit_Expr(self, node: Expr) -> None:
52+
if isinstance(node.value, Constant) and isinstance(node.value.value, str):
53+
return
54+
55+
self.generic_visit(node)
56+
57+
def visit_JoinedStr(self, node):
58+
# dont recurse on JoinedStr, i.e. f strings. Constants that appear *only*
59+
# in f strings are unlikely to be helpful.
60+
return
61+
62+
@classmethod
63+
def _unfold_constant(cls, value: object) -> set[ConstantT]:
64+
if isinstance(value, str) and (
65+
len(value) > 20 or value.isspace() or value == ""
66+
):
67+
# discard long strings, which are unlikely to be useful.
68+
return set()
69+
if isinstance(value, bool):
70+
return set()
71+
if isinstance(value, float) and math.isinf(value):
72+
# we already upweight inf.
73+
return set()
74+
if isinstance(value, (int, float, bytes, str)):
75+
return {value}
76+
# I don't kow what case could go here, but am also not confident there
77+
# isn't one.
78+
return set() # pragma: no cover
79+
80+
def visit_Constant(self, node):
81+
self._add_constant(node.value)
82+
self.generic_visit(node)
83+
84+
85+
@lru_cache(1024)
86+
def constants_from_ast(tree: AST) -> set[ConstantT]:
87+
visitor = ConstantVisitor()
88+
visitor.visit(tree)
89+
return visitor.constants
90+
91+
92+
@lru_cache(1024)
93+
def _module_ast(module: ModuleType) -> Optional[AST]:
94+
try:
95+
source = inspect.getsource(module)
96+
tree = ast.parse(source)
97+
except Exception:
98+
return None
99+
100+
return tree
101+
102+
103+
def local_modules() -> tuple[ModuleType, ...]:
104+
modules = []
105+
for module in sys.modules.values():
106+
if not hasattr(module, "__file__"):
107+
continue
108+
if module.__file__ is None: # pragma: no cover
109+
continue
110+
111+
if ModuleLocation.from_path(module.__file__) is not ModuleLocation.LOCAL:
112+
continue
113+
114+
modules.append(module)
115+
return tuple(modules)
116+
117+
118+
def local_constants():
119+
constants = set()
120+
for module in local_modules():
121+
# normally, hypothesis is a third-party library and is not returned
122+
# by local_modules. However, if it is installed as an editable package
123+
# with pip install -e, then we will pick up on it. Just hardcode an
124+
# ignore here.
125+
126+
# this is actually covered by test_constants_from_running_file, but
127+
# not in the same process.
128+
if is_hypothesis_file(module.__file__): # pragma: no cover
129+
continue
130+
131+
tree = _module_ast(module)
132+
if tree is None: # pragma: no cover
133+
continue
134+
constants |= constants_from_ast(tree)
135+
136+
return constants

hypothesis-python/src/hypothesis/internal/scrutineer.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import types
1818
from collections import defaultdict
1919
from collections.abc import Iterable
20+
from enum import IntEnum
2021
from functools import lru_cache, reduce
2122
from os import sep
2223
from pathlib import Path
@@ -231,16 +232,27 @@ def get_explaining_locations(traces):
231232
)
232233

233234

235+
class ModuleLocation(IntEnum):
236+
LOCAL = 0
237+
SITE_PACKAGES = 1
238+
STDLIB = 2
239+
240+
@classmethod
241+
@lru_cache(1024)
242+
def from_path(cls, path: str) -> "ModuleLocation":
243+
path = Path(path).resolve()
244+
# site-packages may be a subdir of stdlib or platlib, so it's important to
245+
# check is_relative_to for this before the stdlib.
246+
if any(path.is_relative_to(p) for p in SITE_PACKAGES_DIRS):
247+
return cls.SITE_PACKAGES
248+
if any(path.is_relative_to(p) for p in STDLIB_DIRS):
249+
return cls.STDLIB
250+
return cls.LOCAL
251+
252+
234253
# show local files first, then site-packages, then stdlib
235-
def _sort_key(path, lineno):
236-
path = Path(path).resolve()
237-
# site-packages may be a subdir of stdlib or platlib, so it's important to
238-
# check is_relative_to for this before the stdlib.
239-
if any(path.is_relative_to(p) for p in SITE_PACKAGES_DIRS):
240-
return (1, path, lineno)
241-
if any(path.is_relative_to(p) for p in STDLIB_DIRS):
242-
return (2, path, lineno)
243-
return (0, path, lineno)
254+
def _sort_key(path: str, lineno: int) -> tuple[int, str, int]:
255+
return (ModuleLocation.from_path(path), path, lineno)
244256

245257

246258
def make_report(explanations, *, cap_lines_at=5):
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# This file is part of Hypothesis, which may be found at
2+
# https://github.com/HypothesisWorks/hypothesis/
3+
#
4+
# Copyright the Hypothesis Authors.
5+
# Individual contributors are listed in AUTHORS.rst and the git log.
6+
#
7+
# This Source Code Form is subject to the terms of the Mozilla Public License,
8+
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9+
# obtain one at https://mozilla.org/MPL/2.0/.
10+
11+
import ast
12+
import subprocess
13+
import sys
14+
import textwrap
15+
from types import ModuleType
16+
17+
import pytest
18+
19+
from hypothesis import given, strategies as st
20+
from hypothesis.internal.constants_ast import _module_ast, constants_from_ast
21+
22+
from tests.common.utils import skipif_emscripten
23+
24+
25+
@pytest.mark.parametrize(
26+
"source, expected",
27+
[
28+
(
29+
"""
30+
a1 = 42
31+
a2 = 3.14
32+
a3 = 'test1'
33+
a4 = b'test2'
34+
a5 = (1, 2)
35+
a6 = frozenset([3])
36+
""",
37+
{42, 3.14, "test1", b"test2", 1, 2, 3},
38+
),
39+
("a = (1, (2, 3), frozenset([4, 5]))", {1, 2, 3, 4, 5}),
40+
("a = {'b': 1}", {"b", 1}),
41+
("a = [1]", {1}),
42+
("a = +42", {42}),
43+
("a = 1 + 2", {1, 2}),
44+
("a = ~ 42", {42}),
45+
# the following cases are ignored:
46+
# * booleans
47+
# * math.inf and math.nan (not constants, but we don't want to collect them
48+
# even if they were)
49+
# * f-strings
50+
# * long strings
51+
# * pure-whitespace strings
52+
# * standalone string expressions (strings not assigned to a variable).
53+
# This covers docstrings of all kinds.
54+
("a = True", set()),
55+
("a = False", set()),
56+
("a = not False", set()),
57+
("a = 1e999", set()),
58+
("a = math.inf", set()),
59+
("a = math.nan", set()),
60+
('a = f"test {x}"', set()),
61+
(f'a = "{"b" * 100}"', set()),
62+
('a = ""', set()),
63+
('a = " "', set()),
64+
('a = "\\n \\n \\n"', set()),
65+
("'test'", set()),
66+
("'test with \\n newlines'", set()),
67+
],
68+
)
69+
def test_constants_from_ast(source, expected):
70+
source = textwrap.dedent(source)
71+
tree = ast.parse(source)
72+
assert constants_from_ast(tree) == expected
73+
74+
75+
@given(st.integers(max_value=-1))
76+
def test_parses_negatives(n):
77+
assert constants_from_ast(ast.parse(f"a = {n}")) == {n}
78+
79+
80+
constants = st.one_of(
81+
st.integers(),
82+
st.floats(allow_nan=False, allow_infinity=False),
83+
st.binary(),
84+
# constants_from_ast ignores the following strings:
85+
# * empty strings
86+
# * long strings
87+
# * strings which are entirely spaces
88+
st.text(min_size=1, max_size=10).filter(lambda s: not s.isspace()),
89+
)
90+
91+
92+
@given(st.tuples(constants))
93+
def test_tuple_constants(value):
94+
tree = ast.parse(str(value))
95+
assert set(constants_from_ast(tree)) == set(value)
96+
97+
98+
@given(st.frozensets(constants))
99+
def test_frozenset_constants(value):
100+
tree = ast.parse(str(value))
101+
assert set(constants_from_ast(tree)) == set(value)
102+
103+
104+
@skipif_emscripten
105+
def test_constants_from_running_file(tmp_path):
106+
p = tmp_path / "test_constants.py"
107+
p.write_text(
108+
textwrap.dedent(
109+
"""
110+
import sys
111+
# stdlib
112+
import json
113+
# third-party
114+
import pytest
115+
import hypothesis
116+
from hypothesis.internal.constants_ast import local_constants
117+
118+
# these modules are in fact detected as local if they are installed
119+
# as editable (as is common for contributors). Prevent the ast constant
120+
# logic from picking up on them
121+
for module in sys.modules.copy():
122+
if module.startswith("hypofuzz"):
123+
del sys.modules[module]
124+
125+
# local
126+
a = 42
127+
b = "test1"
128+
c = True
129+
d = 3.14
130+
e = b"test2"
131+
f = (1, 2)
132+
g = frozenset([3, 4])
133+
actual = local_constants()
134+
assert actual == {
135+
"hypofuzz",
136+
42,
137+
"test1",
138+
True,
139+
3.14,
140+
b"test2",
141+
1,
142+
2,
143+
3,
144+
4
145+
}, actual
146+
""",
147+
),
148+
encoding="utf-8",
149+
)
150+
subprocess.check_call([sys.executable, str(p)])
151+
152+
153+
def test_constants_from_bad_module():
154+
# covering test for the except branch
155+
module = ModuleType("nonexistent")
156+
assert _module_ast(module) is None

tooling/src/hypothesistooling/projects/hypothesispython.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,9 @@ def upload_distribution():
221221
# Construct plain-text + markdown version of this changelog entry,
222222
# with link to canonical source.
223223
build_docs(builder="text", only=["docs/changelog.rst"])
224-
textfile = os.path.join(HYPOTHESIS_PYTHON, "docs", "_build", "text", "changes.txt")
224+
textfile = os.path.join(
225+
HYPOTHESIS_PYTHON, "docs", "_build", "text", "changelog.txt"
226+
)
225227
with open(textfile, encoding="utf-8") as f:
226228
lines = f.readlines()
227229
entries = [i for i, l in enumerate(lines) if CHANGELOG_HEADER.match(l)]

0 commit comments

Comments
 (0)