Skip to content

Commit 0d5c740

Browse files
committed
Implement linting for html5lib-tests
This checks that we have the right headers, in the right order, and checks for both duplicate headers and duplicate tests.
1 parent 4e82e3d commit 0d5c740

19 files changed

+1891
-0
lines changed

.github/workflows/lint.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: lint
2+
3+
concurrency:
4+
group: "${{github.workflow}}-${{github.ref}}"
5+
cancel-in-progress: true
6+
7+
on:
8+
workflow_dispatch:
9+
push:
10+
branches:
11+
- master
12+
pull_request:
13+
types: [opened, synchronize]
14+
branches:
15+
- '*'
16+
17+
jobs:
18+
lint:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v3
22+
- uses: actions/setup-python@v4
23+
with:
24+
python-version: '3.11'
25+
- run: ./lint

.gitignore

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright (c) 2014 GitHub, Inc.
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a
4+
# copy of this software and associated documentation files (the "Software"),
5+
# to deal in the Software without restriction, including without limitation
6+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7+
# and/or sell copies of the Software, and to permit persons to whom the
8+
# Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in
11+
# all copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19+
# DEALINGS IN THE SOFTWARE.
20+
21+
# Byte-compiled / optimized / DLL files
22+
__pycache__/
23+
*.py[cod]
24+
*$py.class
25+
26+
# C extensions
27+
*.so
28+
29+
# Distribution / packaging
30+
.Python
31+
env/
32+
build/
33+
develop-eggs/
34+
dist/
35+
downloads/
36+
eggs/
37+
.eggs/
38+
lib/
39+
lib64/
40+
parts/
41+
sdist/
42+
var/
43+
*.egg-info/
44+
.installed.cfg
45+
*.egg
46+
MANIFEST
47+
48+
# PyInstaller
49+
# Usually these files are written by a python script from a template
50+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
51+
*.manifest
52+
*.spec
53+
54+
# Installer logs
55+
pip-log.txt
56+
pip-delete-this-directory.txt
57+
58+
# Unit test / coverage reports
59+
htmlcov/
60+
.tox/
61+
.coverage
62+
.coverage.*
63+
.cache
64+
nosetests.xml
65+
coverage.xml
66+
*,cover
67+
68+
# Translations
69+
*.mo
70+
*.pot
71+
72+
# Django stuff:
73+
*.log
74+
75+
# Sphinx documentation
76+
doc/_build/
77+
78+
# PyBuilder
79+
target/

lint

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
4+
import lint_lib.lint as lint
5+
6+
sys.exit(lint.main())

lint_lib/__init__.py

Whitespace-only changes.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
2+
index eb2f53f..0f86e6c 100644
3+
--- a/lint_lib/_vendor/funcparserlib/parser.py
4+
+++ b/lint_lib/_vendor/funcparserlib/parser.py
5+
@@ -137,19 +137,6 @@ class Parser(object):
6+
"('x', 'y')"
7+
8+
```
9+
-
10+
- !!! Note
11+
-
12+
- You can enable the parsing log this way:
13+
-
14+
- ```python
15+
- import logging
16+
- logging.basicConfig(level=logging.DEBUG)
17+
- import funcparserlib.parser
18+
- funcparserlib.parser.debug = True
19+
- ```
20+
-
21+
- The way to enable the parsing log may be changed in future versions.
22+
"""
23+
self.name = name
24+
return self

lint_lib/_vendor/__init__.py

Whitespace-only changes.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Copyright © 2009/2021 Andrey Vlasovskikh
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of this
4+
software and associated documentation files (the "Software"), to deal in the Software
5+
without restriction, including without limitation the rights to use, copy, modify,
6+
merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
7+
permit persons to whom the Software is furnished to do so, subject to the following
8+
conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all copies or
11+
substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14+
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
15+
PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
17+
OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
18+
OTHER DEALINGS IN THE SOFTWARE.

lint_lib/_vendor/funcparserlib/__init__.py

Whitespace-only changes.
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2009/2021 Andrey Vlasovskikh
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
6+
# software and associated documentation files (the "Software"), to deal in the Software
7+
# without restriction, including without limitation the rights to use, copy, modify,
8+
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
9+
# permit persons to whom the Software is furnished to do so, subject to the following
10+
# conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all copies
13+
# or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
17+
# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19+
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
20+
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21+
22+
from __future__ import unicode_literals
23+
24+
__all__ = ["make_tokenizer", "TokenSpec", "Token", "LexerError"]
25+
26+
import re
27+
28+
29+
class LexerError(Exception):
30+
def __init__(self, place, msg):
31+
self.place = place
32+
self.msg = msg
33+
34+
def __str__(self):
35+
s = "cannot tokenize data"
36+
line, pos = self.place
37+
return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
38+
39+
40+
class TokenSpec(object):
41+
"""A token specification for generating a lexer via `make_tokenizer()`."""
42+
43+
def __init__(self, type, pattern, flags=0):
44+
"""Initialize a `TokenSpec` object.
45+
46+
Parameters:
47+
type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
48+
`"operator"`)
49+
pattern (str): Regexp for matching this token type
50+
flags (int, optional): Regexp flags, the second argument of `re.compile()`
51+
"""
52+
self.type = type
53+
self.pattern = pattern
54+
self.flags = flags
55+
56+
def __repr__(self):
57+
return "TokenSpec(%r, %r, %r)" % (self.type, self.pattern, self.flags)
58+
59+
60+
class Token(object):
61+
"""A token object that represents a substring of certain type in your text.
62+
63+
You can compare tokens for equality using the `==` operator. Tokens also define
64+
custom `repr()` and `str()`.
65+
66+
Attributes:
67+
type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
68+
`"operator"`)
69+
value (str): Text value of the token
70+
start (Optional[Tuple[int, int]]): Start position (_line_, _column_)
71+
end (Optional[Tuple[int, int]]): End position (_line_, _column_)
72+
"""
73+
74+
def __init__(self, type, value, start=None, end=None):
75+
"""Initialize a `Token` object."""
76+
self.type = type
77+
self.value = value
78+
self.start = start
79+
self.end = end
80+
81+
def __repr__(self):
82+
return "Token(%r, %r)" % (self.type, self.value)
83+
84+
def __eq__(self, other):
85+
# FIXME: Case sensitivity is assumed here
86+
if other is None:
87+
return False
88+
else:
89+
return self.type == other.type and self.value == other.value
90+
91+
def _pos_str(self):
92+
if self.start is None or self.end is None:
93+
return ""
94+
else:
95+
sl, sp = self.start
96+
el, ep = self.end
97+
return "%d,%d-%d,%d:" % (sl, sp, el, ep)
98+
99+
def __str__(self):
100+
s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
101+
return s.strip()
102+
103+
@property
104+
def name(self):
105+
return self.value
106+
107+
def pformat(self):
108+
return "%s %s '%s'" % (
109+
self._pos_str().ljust(20), # noqa
110+
self.type.ljust(14),
111+
self.value,
112+
)
113+
114+
115+
def make_tokenizer(specs):
116+
# noinspection GrazieInspection
117+
"""Make a function that tokenizes text based on the regexp specs.
118+
119+
Type: `(Sequence[TokenSpec | Tuple]) -> Callable[[str], Iterable[Token]]`
120+
121+
A token spec is `TokenSpec` instance.
122+
123+
!!! Note
124+
125+
For legacy reasons, a token spec may also be a tuple of (_type_, _args_), where
126+
_type_ sets the value of `Token.type` for the token, and _args_ are the
127+
positional arguments for `re.compile()`: either just (_pattern_,) or
128+
(_pattern_, _flags_).
129+
130+
It returns a tokenizer function that takes a string and returns an iterable of
131+
`Token` objects, or raises `LexerError` if it cannot tokenize the string according
132+
to its token specs.
133+
134+
Examples:
135+
136+
```pycon
137+
>>> tokenize = make_tokenizer([
138+
... TokenSpec("space", r"\\s+"),
139+
... TokenSpec("id", r"\\w+"),
140+
... TokenSpec("op", r"[,!]"),
141+
... ])
142+
>>> text = "Hello, World!"
143+
>>> [t for t in tokenize(text) if t.type != "space"] # noqa
144+
[Token('id', 'Hello'), Token('op', ','), Token('id', 'World'), Token('op', '!')]
145+
>>> text = "Bye?"
146+
>>> list(tokenize(text))
147+
Traceback (most recent call last):
148+
...
149+
lexer.LexerError: cannot tokenize data: 1,4: "Bye?"
150+
151+
```
152+
"""
153+
compiled = []
154+
for spec in specs:
155+
if isinstance(spec, TokenSpec):
156+
c = spec.type, re.compile(spec.pattern, spec.flags)
157+
else:
158+
name, args = spec
159+
c = name, re.compile(*args)
160+
compiled.append(c)
161+
162+
def match_specs(s, i, position):
163+
line, pos = position
164+
for type, regexp in compiled:
165+
m = regexp.match(s, i)
166+
if m is not None:
167+
value = m.group()
168+
nls = value.count("\n")
169+
n_line = line + nls
170+
if nls == 0:
171+
n_pos = pos + len(value)
172+
else:
173+
n_pos = len(value) - value.rfind("\n") - 1
174+
return Token(type, value, (line, pos + 1), (n_line, n_pos))
175+
else:
176+
err_line = s.splitlines()[line - 1]
177+
raise LexerError((line, pos + 1), err_line)
178+
179+
def f(s):
180+
length = len(s)
181+
line, pos = 1, 0
182+
i = 0
183+
while i < length:
184+
t = match_specs(s, i, (line, pos))
185+
yield t
186+
line, pos = t.end
187+
i += len(t.value)
188+
189+
return f
190+
191+
192+
# This is an example of token specs. See also [this article][1] for a
193+
# discussion of searching for multiline comments using regexps (including `*?`).
194+
#
195+
# [1]: http://ostermiller.org/findcomment.html
196+
_example_token_specs = [
197+
TokenSpec("COMMENT", r"\(\*(.|[\r\n])*?\*\)", re.MULTILINE),
198+
TokenSpec("COMMENT", r"\{(.|[\r\n])*?\}", re.MULTILINE),
199+
TokenSpec("COMMENT", r"//.*"),
200+
TokenSpec("NL", r"[\r\n]+"),
201+
TokenSpec("SPACE", r"[ \t\r\n]+"),
202+
TokenSpec("NAME", r"[A-Za-z_][A-Za-z_0-9]*"),
203+
TokenSpec("REAL", r"[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*"),
204+
TokenSpec("INT", r"[0-9]+"),
205+
TokenSpec("INT", r"\$[0-9A-Fa-f]+"),
206+
TokenSpec("OP", r"(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]"),
207+
TokenSpec("STRING", r"'([^']|(''))*'"),
208+
TokenSpec("CHAR", r"#[0-9]+"),
209+
TokenSpec("CHAR", r"#\$[0-9A-Fa-f]+"),
210+
]
211+
# tokenize = make_tokenizer(_example_token_specs)

0 commit comments

Comments
 (0)