Skip to content

STYLE loosen inconsistent namespace check #40532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,9 +1372,9 @@ def array_likes(request):
data = memoryview(arr)
elif name == "array":
# stdlib array
from array import array as array_stdlib
import array

data = array_stdlib("i", arr)
data = array.array("i", arr)
elif name == "dask":
import dask.array

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,14 +1231,14 @@ def __len__(self, n):
def test_constructor_stdlib_array(self):
# GH 4297
# support Array
from array import array as stdlib_array
import array

result = DataFrame({"A": stdlib_array("i", range(10))})
result = DataFrame({"A": array.array("i", range(10))})
expected = DataFrame({"A": list(range(10))})
tm.assert_frame_equal(result, expected, check_dtype=False)

expected = DataFrame([list(range(10)), list(range(10))])
result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))])
result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_range(self):
Expand Down
71 changes: 46 additions & 25 deletions scripts/check_for_inconsistent_pandas_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Check that test suite file doesn't use the pandas namespace inconsistently.

We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
(likewise for some other common classes).
(likewise for other pandas objects).

This is meant to be run as a pre-commit hook - to run it manually, you can do:

Expand All @@ -15,43 +15,50 @@
though note that you may need to manually fixup some imports and that you will also
need the additional dependency `tokenize-rt` (which is left out from the pre-commit
hook so that it uses the same virtualenv as the other local ones).

The general structure is similar to that of some plugins from
https://github.com/asottile/pyupgrade .
"""

import argparse
import ast
import sys
from typing import (
MutableMapping,
NamedTuple,
Optional,
Sequence,
Set,
Tuple,
)

ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}"
EXCLUDE = {
"eval", # built-in, different from `pd.eval`
"np", # pd.np is deprecated but still tested
}
Offset = Tuple[int, int]
ERROR_MESSAGE = (
"{path}:{lineno}:{col_offset}: "
"Found both '{prefix}.{name}' and '{name}' in {path}"
)


class OffsetWithNamespace(NamedTuple):
lineno: int
col_offset: int
namespace: str


class Visitor(ast.NodeVisitor):
def __init__(self) -> None:
self.pandas_namespace: MutableMapping[Offset, str] = {}
self.no_namespace: Set[str] = set()
self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
self.imported_from_pandas: Set[str] = set()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Outside scope of the PR of course, but curious how hard it would be to generalize this to any namespace? Then you could see it being a helpful linting check even outside of pandas.

Copy link
Member Author

@MarcoGorelli MarcoGorelli Mar 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great idea, thanks! Added to my backlog


def visit_Attribute(self, node: ast.Attribute) -> None:
if (
isinstance(node.value, ast.Name)
and node.value.id == "pd"
and node.attr not in EXCLUDE
):
self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr
if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
offset_with_namespace = OffsetWithNamespace(
node.lineno, node.col_offset, node.value.id
)
self.pandas_namespace[offset_with_namespace] = node.attr
self.generic_visit(node)

def visit_Name(self, node: ast.Name) -> None:
if node.id not in EXCLUDE:
self.no_namespace.add(node.id)
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
if node.module is not None and "pandas" in node.module:
self.imported_from_pandas.update(name.name for name in node.names)
self.generic_visit(node)


Expand All @@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str

tokens = src_to_tokens(content)
for n, i in reversed_enumerate(tokens):
offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
if (
i.offset in visitor.pandas_namespace
and visitor.pandas_namespace[i.offset] in visitor.no_namespace
offset_with_namespace in visitor.pandas_namespace
and visitor.pandas_namespace[offset_with_namespace]
in visitor.imported_from_pandas
):
# Replace `pd`
tokens[n] = i._replace(src="")
Expand All @@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace(
visitor = Visitor()
visitor.visit(tree)

inconsistencies = visitor.no_namespace.intersection(
inconsistencies = visitor.imported_from_pandas.intersection(
visitor.pandas_namespace.values()
)

if not inconsistencies:
# No inconsistent namespace usage, nothing to replace.
return content
return None

if not replace:
msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path)
raise RuntimeError(msg)
inconsistency = inconsistencies.pop()
lineno, col_offset, prefix = next(
key for key, val in visitor.pandas_namespace.items() if val == inconsistency
)
msg = ERROR_MESSAGE.format(
lineno=lineno,
col_offset=col_offset,
prefix=prefix,
name=inconsistency,
path=path,
)
sys.stdout.write(msg)
sys.exit(1)

return replace_inconsistent_pandas_namespace(visitor, content)

Expand Down
58 changes: 40 additions & 18 deletions scripts/tests/test_inconsistent_namespace_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,57 @@
check_for_inconsistent_pandas_namespace,
)

BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()"
BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()"
GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()"
BAD_FILE_0 = (
"from pandas import Categorical\n"
"cat_0 = Categorical()\n"
"cat_1 = pd.Categorical()"
)
BAD_FILE_1 = (
"from pandas import Categorical\n"
"cat_0 = pd.Categorical()\n"
"cat_1 = Categorical()"
)
BAD_FILE_2 = (
"from pandas import Categorical\n"
"cat_0 = pandas.Categorical()\n"
"cat_1 = Categorical()"
)
GOOD_FILE_0 = (
"from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
)
GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()"
PATH = "t.py"


@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1])
def test_inconsistent_usage(content):
msg = r"Found both `pd\.Categorical` and `Categorical` in t\.py"
with pytest.raises(RuntimeError, match=msg):
@pytest.mark.parametrize(
"content, expected",
[
(BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
(BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
(
BAD_FILE_2,
"t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py",
),
],
)
def test_inconsistent_usage(content, expected, capsys):
with pytest.raises(SystemExit):
check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
result, _ = capsys.readouterr()
assert result == expected


@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1])
def test_consistent_usage(content):
@pytest.mark.parametrize("replace", [True, False])
def test_consistent_usage(content, replace):
# should not raise
check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
check_for_inconsistent_pandas_namespace(content, PATH, replace=replace)


@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1])
@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2])
def test_inconsistent_usage_with_replace(content):
result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
expected = "cat_0 = Categorical()\ncat_1 = Categorical()"
assert result == expected


@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1])
def test_consistent_usage_with_replace(content):
result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
expected = content
expected = (
"from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
)
assert result == expected