Skip to content

Commit bfad571

Browse files
author
MomIsBestFriend
committed
CI: Add test case for unwanted patterns
1 parent ad2790c commit bfad571

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

ci/code_checks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
216216
invgrep -R --include=*.{py,pyx} 'xrange' pandas
217217
RET=$(($RET + $?)) ; echo $MSG "DONE"
218218

219+
MSG='Check for use of not concatenated strings' ; echo $MSG
220+
python $BASE_DIR/scripts/validate_string_concatenation.py pandas
221+
RET=$(($RET + $?)) ; echo $MSG "DONE"
222+
219223
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
220224
INVGREP_APPEND=" <- trailing whitespaces found"
221225
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/env python
2+
"""
3+
GH #30454
4+
5+
Check where there is a string that needs to be concatenated.
6+
7+
This is necessary after black formating,
8+
where for example black transforms this:
9+
10+
>>> foo = (
11+
... "bar "
12+
... "baz"
13+
... )
14+
15+
into this:
16+
17+
>>> foo = ("bar " "baz")
18+
19+
Black is not considering this as an
20+
issue (see https://github.com/psf/black/issues/1051), so we are checking
21+
it here.
22+
"""
23+
24+
import os
25+
import sys
26+
import token
27+
import tokenize
28+
29+
# Can be annotated as typing.FrozenSet[str]
30+
FILE_EXTENSIONS_TO_CHECK = frozenset((".pxd", ".py", ".pyx", ".pyx.ini"))
31+
32+
33+
def is_concatenated(file_path):
34+
"""
35+
Checking if the file containing strings that needs to be concatenated.
36+
37+
Parameters
38+
----------
39+
file_path : str
40+
File path pointing to a single file.
41+
42+
Returns
43+
-------
44+
int
45+
Status code representing if the file needs a fix.
46+
0 - All good.
47+
1 - Needs to be fixed.
48+
"""
49+
need_fix = False
50+
with open(file_path, "r") as file_name:
51+
tokens = list(tokenize.generate_tokens(file_name.readline))
52+
for current_token, next_token in zip(tokens, tokens[1:]):
53+
if current_token[0] == next_token[0] == token.STRING:
54+
need_fix = True
55+
print(
56+
"{file_path}:{line_number}:\t{start} and {end}".format(
57+
file_path=file_path,
58+
line_number=current_token[2][0],
59+
start=current_token[1],
60+
end=next_token[1],
61+
)
62+
)
63+
64+
return int(need_fix)
65+
66+
67+
if __name__ == "__main__":
68+
path = sys.argv[1]
69+
70+
if not os.path.exists(path):
71+
raise ValueError("Please enter a valid path, to a file/directory.")
72+
73+
if os.path.isfile(path):
74+
# Means that the given path is of a single file.
75+
sys.exit(is_concatenated(path))
76+
77+
failures = 0
78+
# Means that the given path is of a directory.
79+
for subdir, _, files in os.walk(path):
80+
for file_name in files:
81+
if any(
82+
file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
83+
):
84+
file_extension = os.path.join(subdir, file_name)
85+
failures += is_concatenated(os.path.join(subdir, file_name))
86+
87+
exit_code = 1 if failures >= 1 else 0
88+
sys.exit(exit_code)

0 commit comments

Comments
 (0)