Skip to content

Commit f200032

Browse files
committed
Add tests for regex pattern serialisation
1 parent 0bdddea commit f200032

File tree

2 files changed

+70
-6
lines changed

2 files changed

+70
-6
lines changed

fastjsonschema/generator.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,12 @@ def global_state_code(self):
100100
'',
101101
'',
102102
])
103-
regex_patterns = (
104-
repr(k) + ": " + repr_regex(v)
105-
for k, v in self._compile_regexps.items()
106-
)
107103
return '\n'.join(self._extra_imports_lines + [
108104
'import re',
109105
'from fastjsonschema import JsonSchemaValueException',
110106
'',
111107
'',
112-
'REGEX_PATTERNS = {\n ' + ",\n ".join(regex_patterns) + "\n}",
108+
'REGEX_PATTERNS = ' + serialize_regexes(self._compile_regexps),
113109
'',
114110
])
115111

@@ -298,8 +294,17 @@ def create_variable_is_dict(self):
298294
self.l('{variable}_is_dict = isinstance({variable}, dict)')
299295

300296

301-
def repr_regex(regex):
297+
def serialize_regexes(patterns_dict):
302298
# Unfortunately using `pprint.pformat` is causing errors
299+
# specially with big regexes
300+
regex_patterns = (
301+
repr(k) + ": " + repr_regex(v)
302+
for k, v in patterns_dict.items()
303+
)
304+
return '{\n ' + ",\n ".join(regex_patterns) + "\n}"
305+
306+
307+
def repr_regex(regex):
303308
all_flags = ("A", "I", "DEBUG", "L", "M", "S", "X")
304309
flags = " | ".join(f"re.{f}" for f in all_flags if regex.flags & getattr(re, f))
305310
flags = ", " + flags if flags else ""

tests/test_pattern_serialization.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import re
2+
3+
from fastjsonschema.generator import serialize_regexes
4+
5+
6+
# Examples
7+
ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
8+
PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
9+
PEP440_VERSION = r"""
10+
v?
11+
(?:
12+
(?:(?P<epoch>[0-9]+)!)? # epoch
13+
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
14+
(?P<pre> # pre-release
15+
[-_\.]?
16+
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
17+
[-_\.]?
18+
(?P<pre_n>[0-9]+)?
19+
)?
20+
(?P<post> # post release
21+
(?:-(?P<post_n1>[0-9]+))
22+
|
23+
(?:
24+
[-_\.]?
25+
(?P<post_l>post|rev|r)
26+
[-_\.]?
27+
(?P<post_n2>[0-9]+)?
28+
)
29+
)?
30+
(?P<dev> # dev release
31+
[-_\.]?
32+
(?P<dev_l>dev)
33+
[-_\.]?
34+
(?P<dev_n>[0-9]+)?
35+
)?
36+
)
37+
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
38+
"""
39+
40+
EXAMPLES = {
41+
"unicode-identifier": re.compile(r"^(\w&[^0-9])\w*$", re.I),
42+
"entry-point": re.compile(f"^{ENTRYPOINT_PATTERN}$", re.I),
43+
"pep508-identifier": re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.I),
44+
# Regression tests:
45+
"issue-109": re.compile(r"^[ \r\n\t\S]+$"),
46+
# Some long regexes that would likely break with pprint:
47+
"pep-440": re.compile(r"^\s*" + PEP440_VERSION + r"\s*$", re.X | re.I)
48+
}
49+
50+
51+
def test_serialize_regexes():
52+
serialized = serialize_regexes(EXAMPLES)
53+
reconstructed = eval(serialized)
54+
for key, value in EXAMPLES.items():
55+
assert key in reconstructed
56+
evaluated = reconstructed[key]
57+
assert value.pattern == evaluated.pattern
58+
assert value.flags == evaluated.flags
59+
assert value == evaluated

0 commit comments

Comments
 (0)