Skip to content

Commit a5e0d1f

Browse files
committed
loosen inconsistent namespace check
1 parent 38640d1 commit a5e0d1f

File tree

4 files changed

+91
-48
lines changed

4 files changed

+91
-48
lines changed

pandas/tests/arrays/test_datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1372,9 +1372,9 @@ def array_likes(request):
13721372
data = memoryview(arr)
13731373
elif name == "array":
13741374
# stdlib array
1375-
from array import array as array_stdlib
1375+
import array
13761376

1377-
data = array_stdlib("i", arr)
1377+
data = array.array("i", arr)
13781378
elif name == "dask":
13791379
import dask.array
13801380

pandas/tests/frame/test_constructors.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1231,14 +1231,14 @@ def __len__(self, n):
12311231
def test_constructor_stdlib_array(self):
12321232
# GH 4297
12331233
# support Array
1234-
from array import array as stdlib_array
1234+
import array
12351235

1236-
result = DataFrame({"A": stdlib_array("i", range(10))})
1236+
result = DataFrame({"A": array.array("i", range(10))})
12371237
expected = DataFrame({"A": list(range(10))})
12381238
tm.assert_frame_equal(result, expected, check_dtype=False)
12391239

12401240
expected = DataFrame([list(range(10)), list(range(10))])
1241-
result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))])
1241+
result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
12421242
tm.assert_frame_equal(result, expected, check_dtype=False)
12431243

12441244
def test_constructor_range(self):

scripts/check_for_inconsistent_pandas_namespace.py

+46-25
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Check that test suite file doesn't use the pandas namespace inconsistently.
33
44
We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
5-
(likewise for some other common classes).
5+
(likewise for other pandas objects).
66
77
This is meant to be run as a pre-commit hook - to run it manually, you can do:
88
@@ -15,43 +15,50 @@
1515
though note that you may need to manually fixup some imports and that you will also
1616
need the additional dependency `tokenize-rt` (which is left out from the pre-commit
1717
hook so that it uses the same virtualenv as the other local ones).
18+
19+
The general structure is similar to that of some plugins from
20+
https://github.com/asottile/pyupgrade .
1821
"""
1922

2023
import argparse
2124
import ast
25+
import sys
2226
from typing import (
2327
MutableMapping,
28+
NamedTuple,
2429
Optional,
2530
Sequence,
2631
Set,
27-
Tuple,
2832
)
2933

30-
ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}"
31-
EXCLUDE = {
32-
"eval", # built-in, different from `pd.eval`
33-
"np", # pd.np is deprecated but still tested
34-
}
35-
Offset = Tuple[int, int]
34+
ERROR_MESSAGE = (
35+
"{path}:{lineno}:{col_offset}: "
36+
"Found both '{prefix}.{name}' and '{name}' in {path}"
37+
)
38+
39+
40+
class OffsetWithNamespace(NamedTuple):
41+
lineno: int
42+
col_offset: int
43+
namespace: str
3644

3745

3846
class Visitor(ast.NodeVisitor):
3947
def __init__(self) -> None:
40-
self.pandas_namespace: MutableMapping[Offset, str] = {}
41-
self.no_namespace: Set[str] = set()
48+
self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
49+
self.imported_from_pandas: Set[str] = set()
4250

4351
def visit_Attribute(self, node: ast.Attribute) -> None:
44-
if (
45-
isinstance(node.value, ast.Name)
46-
and node.value.id == "pd"
47-
and node.attr not in EXCLUDE
48-
):
49-
self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr
52+
if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
53+
offset_with_namespace = OffsetWithNamespace(
54+
node.lineno, node.col_offset, node.value.id
55+
)
56+
self.pandas_namespace[offset_with_namespace] = node.attr
5057
self.generic_visit(node)
5158

52-
def visit_Name(self, node: ast.Name) -> None:
53-
if node.id not in EXCLUDE:
54-
self.no_namespace.add(node.id)
59+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
60+
if node.module is not None and "pandas" in node.module:
61+
self.imported_from_pandas.update(name.name for name in node.names)
5562
self.generic_visit(node)
5663

5764

@@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str
6471

6572
tokens = src_to_tokens(content)
6673
for n, i in reversed_enumerate(tokens):
74+
offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
6775
if (
68-
i.offset in visitor.pandas_namespace
69-
and visitor.pandas_namespace[i.offset] in visitor.no_namespace
76+
offset_with_namespace in visitor.pandas_namespace
77+
and visitor.pandas_namespace[offset_with_namespace]
78+
in visitor.imported_from_pandas
7079
):
7180
# Replace `pd`
7281
tokens[n] = i._replace(src="")
@@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace(
8594
visitor = Visitor()
8695
visitor.visit(tree)
8796

88-
inconsistencies = visitor.no_namespace.intersection(
97+
inconsistencies = visitor.imported_from_pandas.intersection(
8998
visitor.pandas_namespace.values()
9099
)
100+
91101
if not inconsistencies:
92102
# No inconsistent namespace usage, nothing to replace.
93-
return content
103+
return None
94104

95105
if not replace:
96-
msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path)
97-
raise RuntimeError(msg)
106+
inconsistency = inconsistencies.pop()
107+
lineno, col_offset, prefix = next(
108+
key for key, val in visitor.pandas_namespace.items() if val == inconsistency
109+
)
110+
msg = ERROR_MESSAGE.format(
111+
lineno=lineno,
112+
col_offset=col_offset,
113+
prefix=prefix,
114+
name=inconsistency,
115+
path=path,
116+
)
117+
sys.stdout.write(msg)
118+
sys.exit(1)
98119

99120
return replace_inconsistent_pandas_namespace(visitor, content)
100121

scripts/tests/test_inconsistent_namespace_check.py

+40-18
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,57 @@
44
check_for_inconsistent_pandas_namespace,
55
)
66

7-
BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()"
8-
BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()"
9-
GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()"
7+
BAD_FILE_0 = (
8+
"from pandas import Categorical\n"
9+
"cat_0 = Categorical()\n"
10+
"cat_1 = pd.Categorical()"
11+
)
12+
BAD_FILE_1 = (
13+
"from pandas import Categorical\n"
14+
"cat_0 = pd.Categorical()\n"
15+
"cat_1 = Categorical()"
16+
)
17+
BAD_FILE_2 = (
18+
"from pandas import Categorical\n"
19+
"cat_0 = pandas.Categorical()\n"
20+
"cat_1 = Categorical()"
21+
)
22+
GOOD_FILE_0 = (
23+
"from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
24+
)
1025
GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()"
1126
PATH = "t.py"
1227

1328

14-
@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1])
15-
def test_inconsistent_usage(content):
16-
msg = r"Found both `pd\.Categorical` and `Categorical` in t\.py"
17-
with pytest.raises(RuntimeError, match=msg):
29+
@pytest.mark.parametrize(
30+
"content, expected",
31+
[
32+
(BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
33+
(BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
34+
(
35+
BAD_FILE_2,
36+
"t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py",
37+
),
38+
],
39+
)
40+
def test_inconsistent_usage(content, expected, capsys):
41+
with pytest.raises(SystemExit):
1842
check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
43+
result, _ = capsys.readouterr()
44+
assert result == expected
1945

2046

2147
@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1])
22-
def test_consistent_usage(content):
48+
@pytest.mark.parametrize("replace", [True, False])
49+
def test_consistent_usage(content, replace):
2350
# should not raise
24-
check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
51+
check_for_inconsistent_pandas_namespace(content, PATH, replace=replace)
2552

2653

27-
@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1])
54+
@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2])
2855
def test_inconsistent_usage_with_replace(content):
2956
result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
30-
expected = "cat_0 = Categorical()\ncat_1 = Categorical()"
31-
assert result == expected
32-
33-
34-
@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1])
35-
def test_consistent_usage_with_replace(content):
36-
result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
37-
expected = content
57+
expected = (
58+
"from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
59+
)
3860
assert result == expected

0 commit comments

Comments
 (0)