forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate_unwanted_patterns.py
executable file
·256 lines (205 loc) · 6.58 KB
/
validate_unwanted_patterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#!/usr/bin/env python
"""
Unwanted patterns test cases.
"""
import argparse
import os
import sys
import token
import tokenize
from typing import Callable, Generator, List, Tuple
FILE_EXTENSIONS_TO_CHECK = (".py", ".pyx", ".pyx.ini", ".pxd")
def main(
function: Callable[[str], Generator[Tuple[str, int, str], None, None]],
source_path: str,
output_format: str,
) -> bool:
"""
Main entry point of the script.
Parameters
----------
function : Callable
Function to execute for the test case.
source_path : str
Source path representing path to a file/directory.
output_format : str
Output format of the error message.
Returns
-------
bool
True if found any patterns are found related to the given function.
Raises
------
ValueError
If the `source_path` is not pointing to existing file/directory.
"""
if not os.path.exists(source_path):
raise ValueError(
"Please enter a valid path, pointing to a valid file/directory."
)
is_failed: bool = False
if os.path.isfile(source_path):
for source_path, line_number, msg in function(source_path):
is_failed = True
print(
output_format.format(
source_path=source_path, line_number=line_number, msg=msg
)
)
for subdir, _, files in os.walk(source_path):
for file_name in files:
if any(
file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
):
for source_path, line_number, msg in function(
os.path.join(subdir, file_name)
):
is_failed = True
print(
output_format.format(
source_path=source_path, line_number=line_number, msg=msg
)
)
return is_failed
def STC(source_path: str) -> Generator[Tuple[str, int, str], None, None]:
"""
Strings To Concatenate.
This test case is necessary after 'Black' (https://github.com/psf/black),
is formating strings over multiple lines.
For example, when this:
>>> foo = (
... "bar "
... "baz"
... )
Is becoming this:
>>> foo = ("bar " "baz")
'Black' is not considering this as an
issue (see https://github.com/psf/black/issues/1051),
so we are checking it here instead.
Parameters
----------
source_path : str
File path pointing to a single file.
Yields
------
source_path : str
Source file path.
line_number : int
Line number of unconcatenated string.
MSG : str
Explenation of the error.
Notes
-----
GH #30454
"""
MSG: str = (
"String unnecessarily split in two by black. Please merge them manually."
)
with open(source_path, "r") as file_name:
tokens: List = list(tokenize.generate_tokens(file_name.readline))
for current_token, next_token in zip(tokens, tokens[1:]):
if current_token[0] == next_token[0] == token.STRING:
yield source_path, current_token[2][0], MSG
def SWWPS(source_path: str) -> Generator[Tuple[str, int, str], None, None]:
"""
Strings With Wrong Placed Space.
Test case for leading spaces in concated strings.
For example:
>>> foo = (
... "bar "
... "baz"
... )
Instead of:
>>> foo = (
... "bar"
... " baz"
... )
Parameters
----------
source_path : str
File path pointing to a single file.
Yields
------
source_path : str
Source file path.
line_number : int
Line number of unconcatenated string.
MSG : str
Explenation of the error.
"""
MSG: str = (
"String has a space at the beginning "
"instead of the end of the previous string."
)
with open(source_path, "r") as file_name:
tokens: List = list(tokenize.generate_tokens(file_name.readline))
for first_token, second_token, third_token in zip(tokens, tokens[1:], tokens[2:]):
if (
first_token[0] == third_token[0] == token.STRING
and second_token[0] == token.NL
):
# Means we are in a block of concated string
# Striping the quotes
first_string = first_token[1][1:-1]
second_string = third_token[1][1:-1]
if (not first_string.endswith(" ")) and (second_string.startswith(" ")):
yield source_path, third_token[2][0], MSG
def BPR(source_path: str) -> Generator[Tuple[str, int, str], None, None]:
"""
Test Case for bare pytest raise.
For example:
>>> with pytest.raise(ValueError):
... # Some code that raises ValueError
Instead of:
>>> with pytest.raise(ValueError, match="foo"):
... # Some code that raises ValueError
Parameters
----------
source_path : str
File path pointing to a single file.
Yields
------
source_path : str
Source file path.
line_number : int
Line number of unconcatenated string.
MSG : str
Explenation of the error.
Notes
-----
GH #23922
"""
MSG: str = "Bare pytests raise have been found."
with open(source_path, "r") as file_name:
tokens: List = list(tokenize.generate_tokens(file_name.readline))
for counter, current_token in enumerate(tokens, start=1):
if current_token[0] == token.NAME and current_token[1] == "raises":
for next_token in tokens[counter:]:
if next_token[0] == token.NAME and next_token[1] == "match":
break
if next_token[0] == token.NEWLINE:
yield source_path, current_token[2][0], MSG
break
if __name__ == "__main__":
FUNCTIONS_MAP = {"STC": STC, "SWWPS": SWWPS, "BPR": BPR}
parser = argparse.ArgumentParser(description="Unwanted patterns checker.")
parser.add_argument(
"path", nargs="?", default=".", help="Source path of file/directory to check."
)
parser.add_argument(
"--format",
"-f",
default="{source_path}:{line_number}:{msg}.",
help="Output format of the error message.",
)
parser.add_argument(
"--id", "-i", choices=FUNCTIONS_MAP.keys(), help="Test case to check."
)
args = parser.parse_args()
sys.exit(
main(
function=FUNCTIONS_MAP[args.id],
source_path=args.path,
output_format=args.format,
)
)