Skip to content

Commit b073917

Browse files
bagerardsobolevn
andauthored
fix performance issue due to search in tokens (#210)
* fix performance issue due to search in tokens * fix flake8 warnings and review comment * fix from review. * more flake8 fix * Use flake8 builtin file_tokens * Refactoring Co-authored-by: sobolevn <[email protected]>
1 parent dca1c19 commit b073917

File tree

9 files changed

+267
-151
lines changed

9 files changed

+267
-151
lines changed

Diff for: .github/workflows/test.yml

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
name: test
22

3-
on: [push, pull_request, workflow_dispatch]
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
workflow_dispatch:
49

510
jobs:
611
build:
@@ -25,7 +30,7 @@ jobs:
2530
echo "$HOME/.poetry/bin" >> $GITHUB_PATH
2631
2732
- name: Set up cache
28-
uses: actions/cache@v1
33+
uses: actions/cache@v2
2934
with:
3035
path: .venv
3136
key: venv-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}

Diff for: CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
We follow Semantic Versions since the `0.1.0` release.
44

55

6+
## 1.1.0
7+
8+
### Features
9+
10+
- Imrpoves performance on long files #210
11+
12+
613
## 1.0.0
714

815
### Features

Diff for: flake8_eradicate.py

+38-28
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
# -*- coding: utf-8 -*-
2-
31
import tokenize
4-
from typing import Iterable, Tuple
2+
from typing import Iterable, Iterator, List, Sequence, Tuple, Type
53

64
import pkg_resources
75
from eradicate import Eradicator
@@ -13,6 +11,7 @@
1311
#: We store the version number inside the `pyproject.toml`:
1412
pkg_version = pkg_resources.get_distribution(pkg_name).version
1513

14+
#: Const for `stdin` mode of `flake8`:
1615
STDIN = 'stdin'
1716

1817

@@ -21,18 +20,27 @@ class Checker(object):
2120

2221
name = pkg_name
2322
version = pkg_version
23+
2424
_error_template = 'E800 Found commented out code'
2525

2626
options = None
2727

28-
def __init__(self, physical_line, tokens) -> None:
28+
def __init__(
29+
self,
30+
tree, # that's the hack we use to trigger this check
31+
file_tokens: List[tokenize.TokenInfo],
32+
lines: Sequence[str],
33+
) -> None:
2934
"""
30-
Creates new checker instance.
35+
``flake8`` plugin constructor.
36+
37+
Arguments:
38+
file_tokens: all tokens for this file.
39+
lines: all file lines.
3140
32-
When performance will be an issue - we can refactor it.
3341
"""
34-
self._physical_line = physical_line
35-
self._tokens = tokens
42+
self._file_tokens = file_tokens
43+
self._lines = lines
3644
self._options = {
3745
'aggressive': self.options.eradicate_aggressive, # type: ignore
3846
}
@@ -44,11 +52,13 @@ def __init__(self, physical_line, tokens) -> None:
4452

4553
if whitelist_ext:
4654
self._eradicator.update_whitelist(
47-
whitelist_ext.split('#'), True,
55+
whitelist_ext.split('#'),
56+
extend_default=True,
4857
)
4958
elif whitelist:
5059
self._eradicator.update_whitelist(
51-
whitelist.split('#'), False,
60+
whitelist.split('#'),
61+
extend_default=False,
5262
)
5363

5464
@classmethod
@@ -103,14 +113,14 @@ def parse_options(cls, options) -> None:
103113
"""Parses registered options for providing them to each visitor."""
104114
cls.options = options
105115

106-
def __iter__(self) -> Iterable[Tuple[int, str]]:
116+
def run(self) -> Iterator[Tuple[int, int, str, Type['Checker']]]:
107117
"""Runs on each step of flake8."""
108-
if self._contains_commented_out_code():
109-
yield (1, self._error_template)
118+
for line_no in self._lines_with_commented_out_code():
119+
yield line_no, 0, self._error_template, type(self)
110120

111-
def _contains_commented_out_code(self) -> bool:
121+
def _lines_with_commented_out_code(self) -> Iterable[int]:
112122
"""
113-
Check if the current physical line contains commented out code.
123+
Yield the physical line number that contain commented out code.
114124
115125
This test relies on eradicate function to remove commented out code
116126
from a physical line.
@@ -121,19 +131,19 @@ def _contains_commented_out_code(self) -> bool:
121131
To prevent this false-positive, the tokens of the physical line are
122132
checked for a comment. The eradicate function is only invokes,
123133
when the tokens indicate a comment in the physical line.
124-
125134
"""
126-
comment_in_line = any(
127-
token_type == tokenize.COMMENT
128-
for token_type, _, _, _, _ in self._tokens
135+
comment_in_file = any(
136+
token.type == tokenize.COMMENT
137+
for token in self._file_tokens
129138
)
130139

131-
if comment_in_line:
132-
filtered_source = ''.join(
133-
self._eradicator.filter_commented_out_code(
134-
self._physical_line,
135-
self._options['aggressive'],
136-
),
137-
)
138-
return self._physical_line != filtered_source
139-
return False
140+
if comment_in_file:
141+
for line_no, line in enumerate(self._lines):
142+
filtered_source = ''.join(
143+
self._eradicator.filter_commented_out_code(
144+
line,
145+
aggressive=self._options['aggressive'],
146+
),
147+
)
148+
if line != filtered_source:
149+
yield line_no + 1

0 commit comments

Comments
 (0)