diff --git a/Makefile b/Makefile index f26689ab65ba5..4a9a48992f92f 100644 --- a/Makefile +++ b/Makefile @@ -25,3 +25,10 @@ doc: cd doc; \ python make.py clean; \ python make.py html + +check: + python3 scripts/validate_unwanted_patterns.py \ + --validation-type="private_function_across_module" \ + --included-file-extensions="py" \ + --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \ + pandas/ diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6006d09bc3e78..9efc53ced1b8f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then fi RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check for use of private module attribute access' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + else + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + echo "isort --version-number" isort --version-number diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 0a70afda893cf..c4723a5f064c7 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -412,7 +412,7 @@ ctypedef fused algos_t: uint8_t -def _validate_limit(nobs: int, limit=None) -> int: +def validate_limit(nobs: int, limit=None) -> int: """ Check that the `limit` argument is a positive integer. @@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): indexer = np.empty(nright, dtype=np.int64) indexer[:] = -1 - lim = _validate_limit(nright, limit) + lim = validate_limit(nright, limit) if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: return indexer @@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): if N == 0: return - lim = _validate_limit(N, limit) + lim = validate_limit(N, limit) val = values[0] for i in range(N): @@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): if N == 0: return - lim = _validate_limit(N, limit) + lim = validate_limit(N, limit) for j in range(K): fill_count = 0 @@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: indexer = np.empty(nright, dtype=np.int64) indexer[:] = -1 - lim = _validate_limit(nright, limit) + lim = validate_limit(nright, limit) if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: return indexer @@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): if N == 0: return - lim = _validate_limit(N, limit) + lim = validate_limit(N, limit) val = values[N - 1] for i in range(N - 1, -1, -1): @@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values, if N == 0: return - lim = _validate_limit(N, limit) + lim = validate_limit(N, limit) for j in range(K): fill_count = 0 diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f297c7165208f..50ec3714f454b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -60,7 +60,7 @@ from pandas.core.indexers import validate_indices if TYPE_CHECKING: - from pandas import Categorical, DataFrame, Series + from pandas import Categorical, DataFrame, Series # noqa:F401 _shared_docs: Dict[str, str] = {} @@ -767,7 +767,7 @@ def value_counts( counts = result._values else: - keys, counts = _value_counts_arraylike(values, dropna) + keys, counts = value_counts_arraylike(values, dropna) result = Series(counts, index=keys, name=name) @@ -780,8 +780,8 @@ def value_counts( return result -# Called once from SparseArray -def _value_counts_arraylike(values, dropna: bool): +# Called once from SparseArray, otherwise could be private +def value_counts_arraylike(values, dropna: bool): """ Parameters ---------- diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 1531f7b292365..47c960dc969d6 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -735,7 +735,7 @@ def value_counts(self, dropna=True): """ from pandas import Index, Series - keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna) + keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna) fcounts = self.sp_index.ngaps if fcounts > 0: if self._null_fill_value and dropna: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3bcd4debbf41a..9f4e535dc787d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -390,7 +390,7 @@ def fillna( mask = isna(self.values) if limit is not None: - limit = libalgos._validate_limit(None, limit=limit) + limit = libalgos.validate_limit(None, limit=limit) mask[mask.cumsum(self.ndim - 1) > limit] = False if not self._can_hold_na: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7802c5cbdbfb3..be66b19d10064 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -228,7 +228,7 @@ def interpolate_1d( ) # default limit is unlimited GH #16282 - limit = algos._validate_limit(nobs=None, limit=limit) + limit = algos.validate_limit(nobs=None, limit=limit) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index 7f107f18eca25..964596d9b6319 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -17,8 +17,8 @@ def inner(): return inner -_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge) -_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge) -_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) -_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) -_mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) +mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge) +mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge) +mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) +mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) +mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f0b35e1cd2a74..17f6696ba3480 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -30,7 +30,7 @@ import pandas.core.common as com from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 +from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.timeseries import ( @@ -939,7 +939,7 @@ def _plot_colorbar(self, ax: "Axes", **kwds): img = ax.collections[-1] cbar = self.fig.colorbar(img, ax=ax, **kwds) - if _mpl_ge_3_0_0(): + if mpl_ge_3_0_0(): # The workaround below is no longer necessary. return diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 98aaab6838fba..c5b44f37150bb 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -307,7 +307,7 @@ def handle_shared_axes( sharey: bool, ): if nplots > 1: - if compat._mpl_ge_3_2_0(): + if compat.mpl_ge_3_2_0(): row_num = lambda x: x.get_subplotspec().rowspan.start col_num = lambda x: x.get_subplotspec().colspan.start else: diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index b753c96af6290..9301a29933d45 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -28,10 +28,10 @@ def setup_method(self, method): mpl.rcdefaults() - self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3() - self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0() - self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0() - self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0() + self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3() + self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0() + self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0() + self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0() self.bp_n_objects = 7 self.polycollection_factor = 2 diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 128a7bdb6730a..9a323f2024721 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -51,7 +51,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): @pytest.mark.xfail(reason="Waiting for PR 34334", strict=True) @pytest.mark.slow def test_plot(self): - from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0 + from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0 df = self.tdf _check_plot_works(df.plot, grid=False) @@ -69,7 +69,7 @@ def test_plot(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) df = DataFrame({"x": [1, 2], "y": [3, 4]}) - if _mpl_ge_3_1_0(): + if mpl_ge_3_1_0(): msg = "'Line2D' object has no property 'blarg'" else: msg = "Unknown property blarg" diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 130acaa8bcd58..0208ab3e0225b 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -96,7 +96,7 @@ def test_bootstrap_plot(self): class TestDataFramePlots(TestPlotBase): @td.skip_if_no_scipy def test_scatter_matrix_axis(self): - from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 + from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0 scatter_matrix = plotting.scatter_matrix @@ -105,7 +105,7 @@ def test_scatter_matrix_axis(self): # we are plotting multiples on a sub-plot with tm.assert_produces_warning( - UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0() + UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0() ): axes = _check_plot_works( scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1 diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 193fef026a96b..1a6d8cc8b9914 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -16,9 +16,7 @@ import sys import token import tokenize -from typing import IO, Callable, FrozenSet, Iterable, List, Tuple - -PATHS_TO_IGNORE: Tuple[str, ...] = ("asv_bench/env",) +from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple def _get_literal_string_prefix_len(token_string: str) -> int: @@ -114,6 +112,58 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: ) +PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"} # no known alternative + + +def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Checking that a private function is not used across modules. + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + Yields + ------ + line_number : int + Line number of the private function that is used across modules. + msg : str + Explenation of the error. + """ + contents = file_obj.read() + tree = ast.parse(contents) + + imported_modules: Set[str] = set() + + for node in ast.walk(tree): + if isinstance(node, (ast.Import, ast.ImportFrom)): + for module in node.names: + module_fqdn = module.name if module.asname is None else module.asname + imported_modules.add(module_fqdn) + + if not isinstance(node, ast.Call): + continue + + try: + module_name = node.func.value.id + function_name = node.func.attr + except AttributeError: + continue + + # Exception section # + + # (Debatable) Class case + if module_name[0].isupper(): + continue + # (Debatable) Dunder methods case + elif function_name.startswith("__") and function_name.endswith("__"): + continue + elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED: + continue + + if module_name in imported_modules and function_name.startswith("_"): + yield (node.lineno, f"Private function '{module_name}.{function_name}'") + + def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: """ This test case is necessary after 'Black' (https://github.com/psf/black), @@ -293,6 +343,7 @@ def main( source_path: str, output_format: str, file_extensions_to_check: str, + excluded_file_paths: str, ) -> bool: """ Main entry point of the script. @@ -305,6 +356,10 @@ def main( Source path representing path to a file/directory. output_format : str Output format of the error message. + file_extensions_to_check : str + Coma seperated values of what file extensions to check. + excluded_file_paths : str + Coma seperated values of what file paths to exclude during the check. Returns ------- @@ -325,6 +380,7 @@ def main( FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset( file_extensions_to_check.split(",") ) + PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(",")) if os.path.isfile(source_path): file_path = source_path @@ -362,6 +418,7 @@ def main( if __name__ == "__main__": available_validation_types: List[str] = [ "bare_pytest_raises", + "private_function_across_module", "strings_to_concatenate", "strings_with_wrong_placed_whitespace", ] @@ -389,6 +446,11 @@ def main( default="py,pyx,pxd,pxi", help="Coma seperated file extensions to check.", ) + parser.add_argument( + "--excluded-file-paths", + default="asv_bench/env", + help="Comma separated file extensions to check.", + ) args = parser.parse_args() @@ -398,5 +460,6 @@ def main( source_path=args.path, output_format=args.format, file_extensions_to_check=args.included_file_extensions, + excluded_file_paths=args.excluded_file_paths, ) )