Skip to content

STY+CI: check for private function access across modules #36144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ doc:
cd doc; \
python make.py clean; \
python make.py html

check:
python3 scripts/validate_unwanted_patterns.py \
--validation-type="private_function_across_module" \
--included-file-extensions="py" \
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
pandas/
8 changes: 8 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of private module attribute access' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

echo "isort --version-number"
isort --version-number

Expand Down
14 changes: 7 additions & 7 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ ctypedef fused algos_t:
uint8_t


def _validate_limit(nobs: int, limit=None) -> int:
def validate_limit(nobs: int, limit=None) -> int:
"""
Check that the `limit` argument is a positive integer.

Expand Down Expand Up @@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
indexer = np.empty(nright, dtype=np.int64)
indexer[:] = -1

lim = _validate_limit(nright, limit)
lim = validate_limit(nright, limit)

if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
Expand Down Expand Up @@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
if N == 0:
return

lim = _validate_limit(N, limit)
lim = validate_limit(N, limit)

val = values[0]
for i in range(N):
Expand Down Expand Up @@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
if N == 0:
return

lim = _validate_limit(N, limit)
lim = validate_limit(N, limit)

for j in range(K):
fill_count = 0
Expand Down Expand Up @@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
indexer = np.empty(nright, dtype=np.int64)
indexer[:] = -1

lim = _validate_limit(nright, limit)
lim = validate_limit(nright, limit)

if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
Expand Down Expand Up @@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
if N == 0:
return

lim = _validate_limit(N, limit)
lim = validate_limit(N, limit)

val = values[N - 1]
for i in range(N - 1, -1, -1):
Expand Down Expand Up @@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
if N == 0:
return

lim = _validate_limit(N, limit)
lim = validate_limit(N, limit)

for j in range(K):
fill_count = 0
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
from pandas.core.indexers import validate_indices

if TYPE_CHECKING:
from pandas import Categorical, DataFrame, Series
from pandas import Categorical, DataFrame, Series # noqa:F401

_shared_docs: Dict[str, str] = {}

Expand Down Expand Up @@ -767,7 +767,7 @@ def value_counts(
counts = result._values

else:
keys, counts = _value_counts_arraylike(values, dropna)
keys, counts = value_counts_arraylike(values, dropna)

result = Series(counts, index=keys, name=name)

Expand All @@ -780,8 +780,8 @@ def value_counts(
return result


# Called once from SparseArray
def _value_counts_arraylike(values, dropna: bool):
# Called once from SparseArray, otherwise could be private
def value_counts_arraylike(values, dropna: bool):
"""
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ def value_counts(self, dropna=True):
"""
from pandas import Index, Series

keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
fcounts = self.sp_index.ngaps
if fcounts > 0:
if self._null_fill_value and dropna:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def fillna(

mask = isna(self.values)
if limit is not None:
limit = libalgos._validate_limit(None, limit=limit)
limit = libalgos.validate_limit(None, limit=limit)
mask[mask.cumsum(self.ndim - 1) > limit] = False

if not self._can_hold_na:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def interpolate_1d(
)

# default limit is unlimited GH #16282
limit = algos._validate_limit(nobs=None, limit=limit)
limit = algos.validate_limit(nobs=None, limit=limit)

# These are sets of index pointers to invalid values... i.e. {0, 1, etc...
all_nans = set(np.flatnonzero(invalid))
Expand Down
10 changes: 5 additions & 5 deletions pandas/plotting/_matplotlib/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def inner():
return inner


_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
_mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
4 changes: 2 additions & 2 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import pandas.core.common as com

from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
from pandas.plotting._matplotlib.style import get_standard_colors
from pandas.plotting._matplotlib.timeseries import (
Expand Down Expand Up @@ -939,7 +939,7 @@ def _plot_colorbar(self, ax: "Axes", **kwds):
img = ax.collections[-1]
cbar = self.fig.colorbar(img, ax=ax, **kwds)

if _mpl_ge_3_0_0():
if mpl_ge_3_0_0():
# The workaround below is no longer necessary.
return

Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def handle_shared_axes(
sharey: bool,
):
if nplots > 1:
if compat._mpl_ge_3_2_0():
if compat.mpl_ge_3_2_0():
row_num = lambda x: x.get_subplotspec().rowspan.start
col_num = lambda x: x.get_subplotspec().colspan.start
else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def setup_method(self, method):

mpl.rcdefaults()

self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3()
self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0()
self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0()
self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0()
self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0()

self.bp_n_objects = 7
self.polycollection_factor = 2
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/plotting/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _assert_xtickslabels_visibility(self, axes, expected):
@pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
@pytest.mark.slow
def test_plot(self):
from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0
from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0

df = self.tdf
_check_plot_works(df.plot, grid=False)
Expand All @@ -69,7 +69,7 @@ def test_plot(self):
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))

df = DataFrame({"x": [1, 2], "y": [3, 4]})
if _mpl_ge_3_1_0():
if mpl_ge_3_1_0():
msg = "'Line2D' object has no property 'blarg'"
else:
msg = "Unknown property blarg"
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/plotting/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_bootstrap_plot(self):
class TestDataFramePlots(TestPlotBase):
@td.skip_if_no_scipy
def test_scatter_matrix_axis(self):
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0

scatter_matrix = plotting.scatter_matrix

Expand All @@ -105,7 +105,7 @@ def test_scatter_matrix_axis(self):

# we are plotting multiples on a sub-plot
with tm.assert_produces_warning(
UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0()
UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0()
):
axes = _check_plot_works(
scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1
Expand Down
69 changes: 66 additions & 3 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@
import sys
import token
import tokenize
from typing import IO, Callable, FrozenSet, Iterable, List, Tuple

PATHS_TO_IGNORE: Tuple[str, ...] = ("asv_bench/env",)
from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple


def _get_literal_string_prefix_len(token_string: str) -> int:
Expand Down Expand Up @@ -114,6 +112,58 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
)


PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"} # no known alternative


def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
"""
Checking that a private function is not used across modules.
Parameters
----------
file_obj : IO
File-like object containing the Python code to validate.
Yields
------
line_number : int
Line number of the private function that is used across modules.
msg : str
Explenation of the error.
"""
contents = file_obj.read()
tree = ast.parse(contents)

imported_modules: Set[str] = set()

for node in ast.walk(tree):
if isinstance(node, (ast.Import, ast.ImportFrom)):
for module in node.names:
module_fqdn = module.name if module.asname is None else module.asname
imported_modules.add(module_fqdn)

if not isinstance(node, ast.Call):
continue

try:
module_name = node.func.value.id
function_name = node.func.attr
except AttributeError:
continue

# Exception section #

# (Debatable) Class case
if module_name[0].isupper():
continue
# (Debatable) Dunder methods case
elif function_name.startswith("__") and function_name.endswith("__"):
continue
elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
continue

if module_name in imported_modules and function_name.startswith("_"):
yield (node.lineno, f"Private function '{module_name}.{function_name}'")


def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
"""
This test case is necessary after 'Black' (https://github.com/psf/black),
Expand Down Expand Up @@ -293,6 +343,7 @@ def main(
source_path: str,
output_format: str,
file_extensions_to_check: str,
excluded_file_paths: str,
) -> bool:
"""
Main entry point of the script.
Expand All @@ -305,6 +356,10 @@ def main(
Source path representing path to a file/directory.
output_format : str
Output format of the error message.
file_extensions_to_check : str
Coma seperated values of what file extensions to check.
excluded_file_paths : str
Coma seperated values of what file paths to exclude during the check.

Returns
-------
Expand All @@ -325,6 +380,7 @@ def main(
FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
file_extensions_to_check.split(",")
)
PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(","))

if os.path.isfile(source_path):
file_path = source_path
Expand Down Expand Up @@ -362,6 +418,7 @@ def main(
if __name__ == "__main__":
available_validation_types: List[str] = [
"bare_pytest_raises",
"private_function_across_module",
"strings_to_concatenate",
"strings_with_wrong_placed_whitespace",
]
Expand Down Expand Up @@ -389,6 +446,11 @@ def main(
default="py,pyx,pxd,pxi",
help="Coma seperated file extensions to check.",
)
parser.add_argument(
"--excluded-file-paths",
default="asv_bench/env",
help="Comma separated file extensions to check.",
)

args = parser.parse_args()

Expand All @@ -398,5 +460,6 @@ def main(
source_path=args.path,
output_format=args.format,
file_extensions_to_check=args.included_file_extensions,
excluded_file_paths=args.excluded_file_paths,
)
)