Skip to content

Commit 6bd2640

Browse files
authored
STYLE loosen inconsistent namespace check (#40532)
1 parent 1367cac commit 6bd2640

File tree

10 files changed

+125
-86
lines changed

10 files changed

+125
-86
lines changed

.pre-commit-config.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,10 @@ repos:
8686
types: [python]
8787
exclude: ^pandas/_typing\.py$
8888
- id: inconsistent-namespace-usage
89-
name: 'Check for inconsistent use of pandas namespace in tests'
89+
name: 'Check for inconsistent use of pandas namespace'
9090
entry: python scripts/check_for_inconsistent_pandas_namespace.py
9191
language: python
9292
types: [python]
93-
files: ^pandas/tests/
9493
- id: incorrect-code-directives
9594
name: Check for incorrect code block or IPython directives
9695
language: pygrep

asv_bench/benchmarks/arithmetic.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,7 @@ def setup(self, op, shape):
140140
# construct dataframe with 2 blocks
141141
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
142142
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
143-
df = pd.concat(
144-
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
145-
)
143+
df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True)
146144
# should already be the case, but just to be sure
147145
df._consolidate_inplace()
148146

@@ -151,7 +149,7 @@ def setup(self, op, shape):
151149
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
152150
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
153151
df2 = pd.concat(
154-
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
152+
[DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)],
155153
axis=1,
156154
ignore_index=True,
157155
)
@@ -459,9 +457,9 @@ class OffsetArrayArithmetic:
459457

460458
def setup(self, offset):
461459
N = 10000
462-
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
460+
rng = date_range(start="1/1/2000", periods=N, freq="T")
463461
self.rng = rng
464-
self.ser = pd.Series(rng)
462+
self.ser = Series(rng)
465463

466464
def time_add_series_offset(self, offset):
467465
with warnings.catch_warnings(record=True):
@@ -478,7 +476,7 @@ class ApplyIndex:
478476

479477
def setup(self, offset):
480478
N = 10000
481-
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
479+
rng = date_range(start="1/1/2000", periods=N, freq="T")
482480
self.rng = rng
483481

484482
def time_apply_index(self, offset):
@@ -490,17 +488,17 @@ class BinaryOpsMultiIndex:
490488
param_names = ["func"]
491489

492490
def setup(self, func):
493-
date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
491+
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
494492
level_0_names = [str(i) for i in range(30)]
495493

496-
index = pd.MultiIndex.from_product([level_0_names, date_range])
494+
index = pd.MultiIndex.from_product([level_0_names, array])
497495
column_names = ["col_1", "col_2"]
498496

499-
self.df = pd.DataFrame(
497+
self.df = DataFrame(
500498
np.random.rand(len(index), 2), index=index, columns=column_names
501499
)
502500

503-
self.arg_df = pd.DataFrame(
501+
self.arg_df = DataFrame(
504502
np.random.randint(1, 10, (len(level_0_names), 2)),
505503
index=level_0_names,
506504
columns=column_names,

asv_bench/benchmarks/sparse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def setup(self):
2828
data = np.random.randn(N)[:-i]
2929
idx = rng[:-i]
3030
data[100:] = np.nan
31-
self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
31+
self.series[i] = Series(SparseArray(data), index=idx)
3232

3333
def time_series_to_frame(self):
3434
pd.DataFrame(self.series)
@@ -63,7 +63,7 @@ def setup(self):
6363
)
6464

6565
def time_sparse_series_from_coo(self):
66-
pd.Series.sparse.from_coo(self.matrix)
66+
Series.sparse.from_coo(self.matrix)
6767

6868

6969
class ToCoo:

pandas/_testing/__init__.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True):
207207
"""
208208
if box_cls is pd.array:
209209
expected = pd.array(expected)
210-
elif box_cls is pd.Index:
211-
expected = pd.Index(expected)
212-
elif box_cls is pd.Series:
213-
expected = pd.Series(expected)
214-
elif box_cls is pd.DataFrame:
215-
expected = pd.Series(expected).to_frame()
210+
elif box_cls is Index:
211+
expected = Index(expected)
212+
elif box_cls is Series:
213+
expected = Series(expected)
214+
elif box_cls is DataFrame:
215+
expected = Series(expected).to_frame()
216216
if transpose:
217217
# for vector operations, we need a DataFrame to be a single-row,
218218
# not a single-column, in order to operate against non-DataFrame
@@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None)
400400
"x": state.rand(n) * 2 - 1,
401401
"y": state.rand(n) * 2 - 1,
402402
}
403-
df = pd.DataFrame(columns, index=index, columns=sorted(columns))
403+
df = DataFrame(columns, index=index, columns=sorted(columns))
404404
if df.index[-1] == end:
405405
df = df.iloc[:-1]
406406
return df

pandas/conftest.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def unique_nulls_fixture(request):
328328
# ----------------------------------------------------------------
329329

330330

331-
@pytest.fixture(params=[pd.DataFrame, pd.Series])
331+
@pytest.fixture(params=[DataFrame, Series])
332332
def frame_or_series(request):
333333
"""
334334
Fixture to parametrize over DataFrame and Series.
@@ -338,7 +338,7 @@ def frame_or_series(request):
338338

339339
# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]"
340340
@pytest.fixture(
341-
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
341+
params=[Index, Series], ids=["index", "series"] # type: ignore[list-item]
342342
)
343343
def index_or_series(request):
344344
"""
@@ -356,9 +356,7 @@ def index_or_series(request):
356356
index_or_series2 = index_or_series
357357

358358

359-
@pytest.fixture(
360-
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
361-
)
359+
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
362360
def index_or_series_or_array(request):
363361
"""
364362
Fixture to parametrize over Index, Series, and ExtensionArray
@@ -559,7 +557,7 @@ def index_with_missing(request):
559557
# ----------------------------------------------------------------
560558
@pytest.fixture
561559
def empty_series():
562-
return pd.Series([], index=[], dtype=np.float64)
560+
return Series([], index=[], dtype=np.float64)
563561

564562

565563
@pytest.fixture
@@ -596,7 +594,7 @@ def _create_series(index):
596594
""" Helper for the _series dict """
597595
size = len(index)
598596
data = np.random.randn(size)
599-
return pd.Series(data, index=index, name="a")
597+
return Series(data, index=index, name="a")
600598

601599

602600
_series = {
@@ -1437,16 +1435,16 @@ def any_numpy_dtype(request):
14371435
("boolean", [True, np.nan, False]),
14381436
("boolean", [True, pd.NA, False]),
14391437
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
1440-
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
1438+
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
14411439
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
14421440
# The following two dtypes are commented out due to GH 23554
14431441
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
14441442
# ('timedelta64', [np.timedelta64(1, 'D'),
14451443
# np.nan, np.timedelta64(2, 'D')]),
14461444
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
14471445
("time", [time(1), np.nan, time(2)]),
1448-
("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
1449-
("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
1446+
("period", [Period(2013), pd.NaT, Period(2018)]),
1447+
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
14501448
]
14511449
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
14521450

pandas/io/formats/style.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ def __init__(
186186
if not data.index.is_unique or not data.columns.is_unique:
187187
raise ValueError("style is not supported for non-unique indices.")
188188
self.data: DataFrame = data
189-
self.index: pd.Index = data.index
190-
self.columns: pd.Index = data.columns
189+
self.index: Index = data.index
190+
self.columns: Index = data.columns
191191
self.table_styles = table_styles
192192
if not isinstance(uuid_len, int) or not uuid_len >= 0:
193193
raise TypeError("``uuid_len`` must be an integer in range [0, 32].")
@@ -913,7 +913,7 @@ def _apply(
913913
result.columns = data.columns
914914
else:
915915
result = func(data, **kwargs)
916-
if not isinstance(result, pd.DataFrame):
916+
if not isinstance(result, DataFrame):
917917
if not isinstance(result, np.ndarray):
918918
raise TypeError(
919919
f"Function {repr(func)} must return a DataFrame or ndarray "
@@ -1565,7 +1565,7 @@ def css(rgba) -> str:
15651565
if s.ndim == 1:
15661566
return [css(rgba) for rgba in rgbas]
15671567
else:
1568-
return pd.DataFrame(
1568+
return DataFrame(
15691569
[[css(rgba) for rgba in row] for row in rgbas],
15701570
index=s.index,
15711571
columns=s.columns,
@@ -1655,7 +1655,7 @@ def css(x):
16551655
if s.ndim == 1:
16561656
return [css(x) for x in normed]
16571657
else:
1658-
return pd.DataFrame(
1658+
return DataFrame(
16591659
[[css(x) for x in row] for row in normed],
16601660
index=s.index,
16611661
columns=s.columns,

pandas/tests/arrays/test_datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1372,9 +1372,9 @@ def array_likes(request):
13721372
data = memoryview(arr)
13731373
elif name == "array":
13741374
# stdlib array
1375-
from array import array as array_stdlib
1375+
import array
13761376

1377-
data = array_stdlib("i", arr)
1377+
data = array.array("i", arr)
13781378
elif name == "dask":
13791379
import dask.array
13801380

pandas/tests/frame/test_constructors.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1236,14 +1236,14 @@ def __len__(self, n):
12361236
def test_constructor_stdlib_array(self):
12371237
# GH 4297
12381238
# support Array
1239-
from array import array as stdlib_array
1239+
import array
12401240

1241-
result = DataFrame({"A": stdlib_array("i", range(10))})
1241+
result = DataFrame({"A": array.array("i", range(10))})
12421242
expected = DataFrame({"A": list(range(10))})
12431243
tm.assert_frame_equal(result, expected, check_dtype=False)
12441244

12451245
expected = DataFrame([list(range(10)), list(range(10))])
1246-
result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))])
1246+
result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
12471247
tm.assert_frame_equal(result, expected, check_dtype=False)
12481248

12491249
def test_constructor_range(self):

scripts/check_for_inconsistent_pandas_namespace.py

+46-25
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Check that test suite file doesn't use the pandas namespace inconsistently.
33
44
We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
5-
(likewise for some other common classes).
5+
(likewise for other pandas objects).
66
77
This is meant to be run as a pre-commit hook - to run it manually, you can do:
88
@@ -15,43 +15,50 @@
1515
though note that you may need to manually fixup some imports and that you will also
1616
need the additional dependency `tokenize-rt` (which is left out from the pre-commit
1717
hook so that it uses the same virtualenv as the other local ones).
18+
19+
The general structure is similar to that of some plugins from
20+
https://github.com/asottile/pyupgrade .
1821
"""
1922

2023
import argparse
2124
import ast
25+
import sys
2226
from typing import (
2327
MutableMapping,
28+
NamedTuple,
2429
Optional,
2530
Sequence,
2631
Set,
27-
Tuple,
2832
)
2933

30-
ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}"
31-
EXCLUDE = {
32-
"eval", # built-in, different from `pd.eval`
33-
"np", # pd.np is deprecated but still tested
34-
}
35-
Offset = Tuple[int, int]
34+
ERROR_MESSAGE = (
35+
"{path}:{lineno}:{col_offset}: "
36+
"Found both '{prefix}.{name}' and '{name}' in {path}"
37+
)
38+
39+
40+
class OffsetWithNamespace(NamedTuple):
41+
lineno: int
42+
col_offset: int
43+
namespace: str
3644

3745

3846
class Visitor(ast.NodeVisitor):
3947
def __init__(self) -> None:
40-
self.pandas_namespace: MutableMapping[Offset, str] = {}
41-
self.no_namespace: Set[str] = set()
48+
self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
49+
self.imported_from_pandas: Set[str] = set()
4250

4351
def visit_Attribute(self, node: ast.Attribute) -> None:
44-
if (
45-
isinstance(node.value, ast.Name)
46-
and node.value.id == "pd"
47-
and node.attr not in EXCLUDE
48-
):
49-
self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr
52+
if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
53+
offset_with_namespace = OffsetWithNamespace(
54+
node.lineno, node.col_offset, node.value.id
55+
)
56+
self.pandas_namespace[offset_with_namespace] = node.attr
5057
self.generic_visit(node)
5158

52-
def visit_Name(self, node: ast.Name) -> None:
53-
if node.id not in EXCLUDE:
54-
self.no_namespace.add(node.id)
59+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
60+
if node.module is not None and "pandas" in node.module:
61+
self.imported_from_pandas.update(name.name for name in node.names)
5562
self.generic_visit(node)
5663

5764

@@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str
6471

6572
tokens = src_to_tokens(content)
6673
for n, i in reversed_enumerate(tokens):
74+
offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
6775
if (
68-
i.offset in visitor.pandas_namespace
69-
and visitor.pandas_namespace[i.offset] in visitor.no_namespace
76+
offset_with_namespace in visitor.pandas_namespace
77+
and visitor.pandas_namespace[offset_with_namespace]
78+
in visitor.imported_from_pandas
7079
):
7180
# Replace `pd`
7281
tokens[n] = i._replace(src="")
@@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace(
8594
visitor = Visitor()
8695
visitor.visit(tree)
8796

88-
inconsistencies = visitor.no_namespace.intersection(
97+
inconsistencies = visitor.imported_from_pandas.intersection(
8998
visitor.pandas_namespace.values()
9099
)
100+
91101
if not inconsistencies:
92102
# No inconsistent namespace usage, nothing to replace.
93-
return content
103+
return None
94104

95105
if not replace:
96-
msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path)
97-
raise RuntimeError(msg)
106+
inconsistency = inconsistencies.pop()
107+
lineno, col_offset, prefix = next(
108+
key for key, val in visitor.pandas_namespace.items() if val == inconsistency
109+
)
110+
msg = ERROR_MESSAGE.format(
111+
lineno=lineno,
112+
col_offset=col_offset,
113+
prefix=prefix,
114+
name=inconsistency,
115+
path=path,
116+
)
117+
sys.stdout.write(msg)
118+
sys.exit(1)
98119

99120
return replace_inconsistent_pandas_namespace(visitor, content)
100121

0 commit comments

Comments
 (0)