Skip to content

Commit 950f66c

Browse files
authored
Merge pull request cylc#6078 from MetRonnie/cylc-lint
`cylc lint` S007: fix regex catastrophic backtracking
2 parents e621d9c + 588ca29 commit 950f66c

File tree

3 files changed

+102
-116
lines changed

3 files changed

+102
-116
lines changed

changes.d/6078.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed bug where `cylc lint` could hang when checking `inherit` settings in `flow.cylc`.

cylc/flow/scripts/lint.py

Lines changed: 33 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,38 @@ def check_indentation(line: str) -> bool:
265265
return bool(len(match[0]) % 4 != 0)
266266

267267

268+
INHERIT_REGEX = re.compile(r'\s*inherit\s*=\s*(.*)')
269+
FAM_NAME_IGNORE_REGEX = re.compile(
270+
# Stuff we want to ignore when checking for lowercase in family names
271+
r'''
272+
# comments
273+
(?<!{)\#.*
274+
# or Cylc parameters
275+
| <[^>]+>
276+
# or Jinja2
277+
| {{.*?}} | {%.*?%} | {\#.*?\#}
278+
# or EmPy
279+
| (@[\[{\(]).*([\]\}\)])
280+
''',
281+
re.X
282+
)
283+
LOWERCASE_REGEX = re.compile(r'[a-z]')
284+
285+
286+
def check_lowercase_family_names(line: str) -> bool:
287+
"""Check for lowercase in family names."""
288+
match = INHERIT_REGEX.match(line)
289+
if not match:
290+
return False
291+
# Replace stuff we want to ignore with a neutral char (tilde will do):
292+
content = FAM_NAME_IGNORE_REGEX.sub('~', match.group(1))
293+
return any(
294+
LOWERCASE_REGEX.search(i)
295+
for i in content.split(',')
296+
if i.strip(' \'"') not in {'None', 'none', 'root'}
297+
)
298+
299+
268300
FUNCTION = 'function'
269301

270302
STYLE_GUIDE = (
@@ -351,62 +383,10 @@ def check_indentation(line: str) -> bool:
351383
'url': STYLE_GUIDE + 'trailing-whitespace',
352384
FUNCTION: re.compile(r'[ \t]$').findall
353385
},
354-
# Look for families both from inherit=FAMILY and FAMILY:trigger-all/any.
355-
# Do not match inherit lines with `None` at the start.
356386
"S007": {
357387
'short': 'Family name contains lowercase characters.',
358388
'url': STYLE_GUIDE + 'task-naming-conventions',
359-
FUNCTION: re.compile(
360-
r'''
361-
# match all inherit statements
362-
^\s*inherit\s*=
363-
# filtering out those which match only valid family names
364-
(?!
365-
\s*
366-
# none, None and root are valid family names
367-
# and `inherit =` or `inherit = # x` are valid too
368-
(['"]?(none|None|root|\#.*|$)['"]?|
369-
(
370-
# as are families named with capital letters
371-
[A-Z0-9_-]+
372-
# and optional quotes
373-
| [\'\"]
374-
# which may include Cylc parameters
375-
| (<[^>]+>)
376-
# or Jinja2
377-
| ({[{%].*[%}]})
378-
# or EmPy
379-
| (@[\[{\(]).*([\]\}\)])
380-
)+
381-
)
382-
# this can be a comma separated list
383-
(
384-
\s*,\s*
385-
# none, None and root are valid family names
386-
(['"]?(none|None|root)['"]?|
387-
(
388-
# as are families named with capital letters
389-
[A-Z0-9_-]+
390-
# and optional quotes
391-
| [\'\"]
392-
# which may include Cylc parameters
393-
| (<[^>]+>)
394-
# or Jinja2
395-
| ({[{%].*[%}]})
396-
# or EmPy
397-
| (@[\[{\(]).*([\]\}\)])
398-
)+
399-
)
400-
)*
401-
# allow trailing commas and whitespace
402-
\s*,?\s*
403-
# allow trailing comments
404-
(\#.*)?
405-
$
406-
)
407-
''',
408-
re.X
409-
).findall,
389+
FUNCTION: check_lowercase_family_names,
410390
},
411391
"S008": {
412392
'short': JINJA2_FOUND_WITHOUT_SHEBANG,

tests/unit/scripts/test_lint.py

Lines changed: 68 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from cylc.flow.scripts.lint import (
2828
MANUAL_DEPRECATIONS,
29+
check_lowercase_family_names,
2930
get_cylc_files,
3031
get_pyproject_toml,
3132
get_reference_rst,
@@ -240,77 +241,81 @@ def test_check_cylc_file_line_no():
240241

241242

242243
@pytest.mark.parametrize(
243-
'inherit_line',
244-
(
245-
param(item, id=str(ind))
246-
for ind, item in enumerate([
247-
# lowercase family names are not permitted
248-
'inherit = g',
249-
'inherit = foo, b, a, r',
250-
'inherit = FOO, bar',
251-
'inherit = None, bar',
252-
'inherit = A, b, C',
253-
'inherit = "A", "b"',
254-
"inherit = 'A', 'b'",
255-
# parameters, jinja2 and empy should be ignored
256-
# but any lowercase chars before or after should not
257-
'inherit = a<x>z',
258-
'inherit = a{{ x }}z',
259-
'inherit = a@( x )z',
260-
])
261-
)
244+
'line',
245+
[
246+
# lowercase family names are not permitted
247+
'inherit = g',
248+
'inherit = FOO, bar',
249+
'inherit = None, bar',
250+
'inherit = A, b, C',
251+
'inherit = "A", "b"',
252+
"inherit = 'A', 'b'",
253+
'inherit = FOO_BAr',
254+
# whitespace & trailing commas
255+
' inherit = a , ',
256+
# parameters, jinja2 and empy should be ignored
257+
# but any lowercase chars before or after should not
258+
'inherit = A<x>z',
259+
'inherit = A{{ x }}z',
260+
'inherit = N{# #}one',
261+
'inherit = A@( x )z',
262+
]
262263
)
263-
def test_inherit_lowercase_matches(inherit_line):
264-
lint = lint_text(inherit_line, ['style'])
265-
assert any('S007' in msg for msg in lint.messages)
264+
def test_check_lowercase_family_names__true(line):
265+
assert check_lowercase_family_names(line) is True
266266

267267

268268
@pytest.mark.parametrize(
269-
'inherit_line',
270-
(
271-
param(item, id=str(ind))
272-
for ind, item in enumerate([
273-
# undefined values are ok
274-
'inherit =',
275-
'inherit = ',
276-
# none, None and root are ok
277-
'inherit = none',
278-
'inherit = None',
279-
'inherit = root',
280-
# trailing commas and whitespace are ok
281-
'inherit = None,',
282-
'inherit = None, ',
283-
'inherit = None , ',
284-
# uppercase family names are ok
285-
'inherit = None, FOO, BAR',
286-
'inherit = FOO',
287-
'inherit = BAZ',
288-
'inherit = root',
289-
'inherit = FOO_BAR_0',
290-
# parameters should be ignored
291-
'inherit = A<a>Z',
292-
'inherit = <a=1, b-1, c+1>',
293-
# jinja2 should be ignored
269+
'line',
270+
[
271+
# undefined values are ok
272+
'inherit =',
273+
'inherit = ',
274+
# none, None and root are ok
275+
'inherit = none',
276+
'inherit = None',
277+
'inherit = root',
278+
# whitespace & trailing commas
279+
'inherit = None,',
280+
'inherit = None, ',
281+
' inherit = None , ',
282+
# uppercase family names are ok
283+
'inherit = None, FOO, BAR',
284+
'inherit = FOO',
285+
'inherit = FOO_BAR_0',
286+
# parameters should be ignored
287+
'inherit = A<a>Z',
288+
'inherit = <a=1, b-1, c+1>',
289+
# jinja2 should be ignored
290+
param(
294291
'inherit = A{{ a }}Z, {% for x in range(5) %}'
295292
'A{{ x }}, {% endfor %}',
296-
# empy should be ignored
297-
'inherit = A@( a )Z',
298-
# trailing comments should be ignored
299-
'inherit = A, B # no, comment',
300-
'inherit = # a',
301-
# quotes are ok
302-
'inherit = "A", "B"',
303-
"inherit = 'A', 'B'",
304-
'inherit = "None", B',
305-
'inherit = <a = 1, b - 1>',
306-
# one really awkward, but valid example
293+
id='jinja2-long'
294+
),
295+
# empy should be ignored
296+
'inherit = A@( a )Z',
297+
# trailing comments should be ignored
298+
'inherit = A, B # no, comment',
299+
'inherit = # a',
300+
# quotes are ok
301+
'inherit = "A", "B"',
302+
"inherit = 'A', 'B'",
303+
'inherit = "None", B',
304+
'inherit = <a = 1, b - 1>',
305+
# one really awkward, but valid example
306+
param(
307307
'inherit = none, FOO_BAR_0, "<a - 1>", A<a>Z, A{{a}}Z, A@(a)Z',
308-
])
309-
)
308+
id='awkward'
309+
),
310+
]
310311
)
311-
def test_inherit_lowercase_not_match_none(inherit_line):
312-
lint = lint_text(inherit_line, ['style'])
313-
assert not any('S007' in msg for msg in lint.messages)
312+
def test_check_lowercase_family_names__false(line):
313+
assert check_lowercase_family_names(line) is False
314+
315+
316+
def test_inherit_lowercase_matches():
317+
lint = lint_text('inherit = a', ['style'])
318+
assert any('S007' in msg for msg in lint.messages)
314319

315320

316321
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)