Skip to content

Commit 6e28008

Browse files
authored
STY+CI: check for private function access across modules (#36144)
1 parent bdb6e26 commit 6e28008

File tree

14 files changed

+111
-33
lines changed

14 files changed

+111
-33
lines changed

Makefile

+7
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,10 @@ doc:
2525
cd doc; \
2626
python make.py clean; \
2727
python make.py html
28+
29+
check:
30+
python3 scripts/validate_unwanted_patterns.py \
31+
--validation-type="private_function_across_module" \
32+
--included-file-extensions="py" \
33+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
34+
pandas/

ci/code_checks.sh

+8
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
116116
fi
117117
RET=$(($RET + $?)) ; echo $MSG "DONE"
118118

119+
MSG='Check for use of private module attribute access' ; echo $MSG
120+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122+
else
123+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124+
fi
125+
RET=$(($RET + $?)) ; echo $MSG "DONE"
126+
119127
echo "isort --version-number"
120128
isort --version-number
121129

pandas/_libs/algos.pyx

+7-7
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ ctypedef fused algos_t:
412412
uint8_t
413413

414414

415-
def _validate_limit(nobs: int, limit=None) -> int:
415+
def validate_limit(nobs: int, limit=None) -> int:
416416
"""
417417
Check that the `limit` argument is a positive integer.
418418

@@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
452452
indexer = np.empty(nright, dtype=np.int64)
453453
indexer[:] = -1
454454

455-
lim = _validate_limit(nright, limit)
455+
lim = validate_limit(nright, limit)
456456

457457
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
458458
return indexer
@@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
509509
if N == 0:
510510
return
511511

512-
lim = _validate_limit(N, limit)
512+
lim = validate_limit(N, limit)
513513

514514
val = values[0]
515515
for i in range(N):
@@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
537537
if N == 0:
538538
return
539539

540-
lim = _validate_limit(N, limit)
540+
lim = validate_limit(N, limit)
541541

542542
for j in range(K):
543543
fill_count = 0
@@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
593593
indexer = np.empty(nright, dtype=np.int64)
594594
indexer[:] = -1
595595

596-
lim = _validate_limit(nright, limit)
596+
lim = validate_limit(nright, limit)
597597

598598
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
599599
return indexer
@@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
651651
if N == 0:
652652
return
653653

654-
lim = _validate_limit(N, limit)
654+
lim = validate_limit(N, limit)
655655

656656
val = values[N - 1]
657657
for i in range(N - 1, -1, -1):
@@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
681681
if N == 0:
682682
return
683683

684-
lim = _validate_limit(N, limit)
684+
lim = validate_limit(N, limit)
685685

686686
for j in range(K):
687687
fill_count = 0

pandas/core/algorithms.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from pandas.core.indexers import validate_indices
6161

6262
if TYPE_CHECKING:
63-
from pandas import Categorical, DataFrame, Series
63+
from pandas import Categorical, DataFrame, Series # noqa:F401
6464

6565
_shared_docs: Dict[str, str] = {}
6666

@@ -767,7 +767,7 @@ def value_counts(
767767
counts = result._values
768768

769769
else:
770-
keys, counts = _value_counts_arraylike(values, dropna)
770+
keys, counts = value_counts_arraylike(values, dropna)
771771

772772
result = Series(counts, index=keys, name=name)
773773

@@ -780,8 +780,8 @@ def value_counts(
780780
return result
781781

782782

783-
# Called once from SparseArray
784-
def _value_counts_arraylike(values, dropna: bool):
783+
# Called once from SparseArray, otherwise could be private
784+
def value_counts_arraylike(values, dropna: bool):
785785
"""
786786
Parameters
787787
----------

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ def value_counts(self, dropna=True):
735735
"""
736736
from pandas import Index, Series
737737

738-
keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
738+
keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
739739
fcounts = self.sp_index.ngaps
740740
if fcounts > 0:
741741
if self._null_fill_value and dropna:

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def fillna(
390390

391391
mask = isna(self.values)
392392
if limit is not None:
393-
limit = libalgos._validate_limit(None, limit=limit)
393+
limit = libalgos.validate_limit(None, limit=limit)
394394
mask[mask.cumsum(self.ndim - 1) > limit] = False
395395

396396
if not self._can_hold_na:

pandas/core/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def interpolate_1d(
228228
)
229229

230230
# default limit is unlimited GH #16282
231-
limit = algos._validate_limit(nobs=None, limit=limit)
231+
limit = algos.validate_limit(nobs=None, limit=limit)
232232

233233
# These are sets of index pointers to invalid values... i.e. {0, 1, etc...
234234
all_nans = set(np.flatnonzero(invalid))

pandas/plotting/_matplotlib/compat.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ def inner():
1717
return inner
1818

1919

20-
_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
21-
_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
22-
_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
23-
_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
24-
_mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
20+
mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
21+
mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
22+
mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
23+
mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
24+
mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)

pandas/plotting/_matplotlib/core.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import pandas.core.common as com
3030

3131
from pandas.io.formats.printing import pprint_thing
32-
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
32+
from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
3333
from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
3434
from pandas.plotting._matplotlib.style import get_standard_colors
3535
from pandas.plotting._matplotlib.timeseries import (
@@ -944,7 +944,7 @@ def _plot_colorbar(self, ax: "Axes", **kwds):
944944
img = ax.collections[-1]
945945
cbar = self.fig.colorbar(img, ax=ax, **kwds)
946946

947-
if _mpl_ge_3_0_0():
947+
if mpl_ge_3_0_0():
948948
# The workaround below is no longer necessary.
949949
return
950950

pandas/plotting/_matplotlib/tools.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def handle_shared_axes(
307307
sharey: bool,
308308
):
309309
if nplots > 1:
310-
if compat._mpl_ge_3_2_0():
310+
if compat.mpl_ge_3_2_0():
311311
row_num = lambda x: x.get_subplotspec().rowspan.start
312312
col_num = lambda x: x.get_subplotspec().colspan.start
313313
else:

pandas/tests/plotting/common.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ def setup_method(self, method):
2828

2929
mpl.rcdefaults()
3030

31-
self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3()
32-
self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0()
33-
self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0()
34-
self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0()
31+
self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
32+
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
33+
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
34+
self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0()
3535

3636
self.bp_n_objects = 7
3737
self.polycollection_factor = 2

pandas/tests/plotting/test_frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def _assert_xtickslabels_visibility(self, axes, expected):
5151
@pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
5252
@pytest.mark.slow
5353
def test_plot(self):
54-
from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0
54+
from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0
5555

5656
df = self.tdf
5757
_check_plot_works(df.plot, grid=False)
@@ -69,7 +69,7 @@ def test_plot(self):
6969
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
7070

7171
df = DataFrame({"x": [1, 2], "y": [3, 4]})
72-
if _mpl_ge_3_1_0():
72+
if mpl_ge_3_1_0():
7373
msg = "'Line2D' object has no property 'blarg'"
7474
else:
7575
msg = "Unknown property blarg"

pandas/tests/plotting/test_misc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def test_bootstrap_plot(self):
9696
class TestDataFramePlots(TestPlotBase):
9797
@td.skip_if_no_scipy
9898
def test_scatter_matrix_axis(self):
99-
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
99+
from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
100100

101101
scatter_matrix = plotting.scatter_matrix
102102

@@ -105,7 +105,7 @@ def test_scatter_matrix_axis(self):
105105

106106
# we are plotting multiples on a sub-plot
107107
with tm.assert_produces_warning(
108-
UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0()
108+
UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0()
109109
):
110110
axes = _check_plot_works(
111111
scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1

scripts/validate_unwanted_patterns.py

+66-3
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
import sys
1717
import token
1818
import tokenize
19-
from typing import IO, Callable, FrozenSet, Iterable, List, Tuple
20-
21-
PATHS_TO_IGNORE: Tuple[str, ...] = ("asv_bench/env",)
19+
from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple
2220

2321

2422
def _get_literal_string_prefix_len(token_string: str) -> int:
@@ -114,6 +112,58 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
114112
)
115113

116114

115+
PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"} # no known alternative
116+
117+
118+
def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
119+
"""
120+
Checking that a private function is not used across modules.
121+
Parameters
122+
----------
123+
file_obj : IO
124+
File-like object containing the Python code to validate.
125+
Yields
126+
------
127+
line_number : int
128+
Line number of the private function that is used across modules.
129+
msg : str
130+
Explenation of the error.
131+
"""
132+
contents = file_obj.read()
133+
tree = ast.parse(contents)
134+
135+
imported_modules: Set[str] = set()
136+
137+
for node in ast.walk(tree):
138+
if isinstance(node, (ast.Import, ast.ImportFrom)):
139+
for module in node.names:
140+
module_fqdn = module.name if module.asname is None else module.asname
141+
imported_modules.add(module_fqdn)
142+
143+
if not isinstance(node, ast.Call):
144+
continue
145+
146+
try:
147+
module_name = node.func.value.id
148+
function_name = node.func.attr
149+
except AttributeError:
150+
continue
151+
152+
# Exception section #
153+
154+
# (Debatable) Class case
155+
if module_name[0].isupper():
156+
continue
157+
# (Debatable) Dunder methods case
158+
elif function_name.startswith("__") and function_name.endswith("__"):
159+
continue
160+
elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
161+
continue
162+
163+
if module_name in imported_modules and function_name.startswith("_"):
164+
yield (node.lineno, f"Private function '{module_name}.{function_name}'")
165+
166+
117167
def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
118168
"""
119169
This test case is necessary after 'Black' (https://github.com/psf/black),
@@ -293,6 +343,7 @@ def main(
293343
source_path: str,
294344
output_format: str,
295345
file_extensions_to_check: str,
346+
excluded_file_paths: str,
296347
) -> bool:
297348
"""
298349
Main entry point of the script.
@@ -305,6 +356,10 @@ def main(
305356
Source path representing path to a file/directory.
306357
output_format : str
307358
Output format of the error message.
359+
file_extensions_to_check : str
360+
Coma seperated values of what file extensions to check.
361+
excluded_file_paths : str
362+
Coma seperated values of what file paths to exclude during the check.
308363
309364
Returns
310365
-------
@@ -325,6 +380,7 @@ def main(
325380
FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
326381
file_extensions_to_check.split(",")
327382
)
383+
PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(","))
328384

329385
if os.path.isfile(source_path):
330386
file_path = source_path
@@ -362,6 +418,7 @@ def main(
362418
if __name__ == "__main__":
363419
available_validation_types: List[str] = [
364420
"bare_pytest_raises",
421+
"private_function_across_module",
365422
"strings_to_concatenate",
366423
"strings_with_wrong_placed_whitespace",
367424
]
@@ -389,6 +446,11 @@ def main(
389446
default="py,pyx,pxd,pxi",
390447
help="Coma seperated file extensions to check.",
391448
)
449+
parser.add_argument(
450+
"--excluded-file-paths",
451+
default="asv_bench/env",
452+
help="Comma separated file extensions to check.",
453+
)
392454

393455
args = parser.parse_args()
394456

@@ -398,5 +460,6 @@ def main(
398460
source_path=args.path,
399461
output_format=args.format,
400462
file_extensions_to_check=args.included_file_extensions,
463+
excluded_file_paths=args.excluded_file_paths,
401464
)
402465
)

0 commit comments

Comments
 (0)