Skip to content

Commit 27f406f

Browse files
ShaharNavehjreback
authored andcommitted
CI: Add test case for unwanted patterns (#30467)
1 parent a895ac7 commit 27f406f

File tree

2 files changed

+137
-0
lines changed

2 files changed

+137
-0
lines changed

ci/code_checks.sh

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
100100
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101101
RET=$(($RET + $?)) ; echo $MSG "DONE"
102102

103+
MSG='Check for use of not concatenated strings' ; echo $MSG
104+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
105+
$BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
106+
else
107+
$BASE_DIR/scripts/validate_string_concatenation.py .
108+
fi
109+
RET=$(($RET + $?)) ; echo $MSG "DONE"
110+
103111
echo "isort --version-number"
104112
isort --version-number
105113

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env python
2+
"""
3+
GH #30454
4+
5+
Check where there is a string that needs to be concatenated.
6+
7+
This is necessary after black formating,
8+
where for example black transforms this:
9+
10+
>>> foo = (
11+
... "bar "
12+
... "baz"
13+
... )
14+
15+
into this:
16+
17+
>>> foo = ("bar " "baz")
18+
19+
Black is not considering this as an
20+
issue (see issue https://github.com/psf/black/issues/1051),
21+
so we are checking it here.
22+
"""
23+
24+
import argparse
25+
import os
26+
import sys
27+
import token
28+
import tokenize
29+
from typing import Generator, List, Tuple
30+
31+
FILE_EXTENSIONS_TO_CHECK = (".py", ".pyx", ".pyx.ini", ".pxd")
32+
33+
34+
def main(source_path: str, output_format: str) -> bool:
35+
"""
36+
Main entry point of the script.
37+
38+
Parameters
39+
----------
40+
source_path : str
41+
Source path representing path to a file/directory.
42+
output_format : str
43+
Output format of the script.
44+
45+
Returns
46+
-------
47+
bool
48+
True if found any strings that needs to be concatenated.
49+
50+
Raises
51+
------
52+
ValueError
53+
If the `source_path` is not pointing to existing file/directory.
54+
"""
55+
if not os.path.exists(source_path):
56+
raise ValueError(
57+
"Please enter a valid path, pointing to a valid file/directory."
58+
)
59+
60+
is_failed: bool = False
61+
62+
msg = "String unnecessarily split in two by black. Please merge them manually."
63+
64+
if os.path.isfile(source_path):
65+
for source_path, line_number in strings_to_concatenate(source_path):
66+
is_failed = True
67+
print(
68+
output_format.format(
69+
source_path=source_path, line_number=line_number, msg=msg
70+
)
71+
)
72+
73+
for subdir, _, files in os.walk(source_path):
74+
for file_name in files:
75+
if any(
76+
file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
77+
):
78+
for source_path, line_number in strings_to_concatenate(
79+
os.path.join(subdir, file_name)
80+
):
81+
is_failed = True
82+
print(
83+
output_format.format(
84+
source_path=source_path, line_number=line_number, msg=msg
85+
)
86+
)
87+
return is_failed
88+
89+
90+
def strings_to_concatenate(source_path: str) -> Generator[Tuple[str, int], None, None]:
91+
"""
92+
Yielding the strings that needs to be concatenated in a given file.
93+
94+
Parameters
95+
----------
96+
source_path : str
97+
File path pointing to a single file.
98+
99+
Yields
100+
------
101+
source_path : str
102+
Source file path.
103+
line_number : int
104+
Line number of unconcatenated string.
105+
"""
106+
with open(source_path, "r") as file_name:
107+
tokens: List = list(tokenize.generate_tokens(file_name.readline))
108+
109+
for current_token, next_token in zip(tokens, tokens[1:]):
110+
if current_token[0] == next_token[0] == token.STRING:
111+
yield source_path, current_token[2][0]
112+
113+
114+
if __name__ == "__main__":
115+
parser = argparse.ArgumentParser(description="Validate concatenated strings")
116+
117+
parser.add_argument(
118+
"path", nargs="?", default=".", help="Source path of file/directory to check."
119+
)
120+
parser.add_argument(
121+
"--format",
122+
"-f",
123+
default="{source_path}:{line_number}:{msg}",
124+
help="Output format of the unconcatenated strings.",
125+
)
126+
127+
args = parser.parse_args()
128+
129+
sys.exit(main(source_path=args.path, output_format=args.format))

0 commit comments

Comments
 (0)