pandas-dev · jreback · Sep 5, 2020 · Sep 5, 2020
diff --git a/Makefile b/Makefile
@@ -25,3 +25,10 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+
+check:
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_function_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		pandas/
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of private module attribute access' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+    else
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     echo "isort --version-number"
     isort --version-number
 

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -412,7 +412,7 @@ ctypedef fused algos_t:
     uint8_t
 
 
-def _validate_limit(nobs: int, limit=None) -> int:
+def validate_limit(nobs: int, limit=None) -> int:
     """
     Check that the `limit` argument is a positive integer.
 
@@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
     indexer = np.empty(nright, dtype=np.int64)
     indexer[:] = -1
 
-    lim = _validate_limit(nright, limit)
+    lim = validate_limit(nright, limit)
 
     if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
         return indexer
@@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     val = values[0]
     for i in range(N):
@@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     for j in range(K):
         fill_count = 0
@@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     indexer = np.empty(nright, dtype=np.int64)
     indexer[:] = -1
 
-    lim = _validate_limit(nright, limit)
+    lim = validate_limit(nright, limit)
 
     if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
         return indexer
@@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     val = values[N - 1]
     for i in range(N - 1, -1, -1):
@@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     for j in range(K):
         fill_count = 0

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -60,7 +60,7 @@
 from pandas.core.indexers import validate_indices
 
 if TYPE_CHECKING:
-    from pandas import Categorical, DataFrame, Series
+    from pandas import Categorical, DataFrame, Series  # noqa:F401
 
 _shared_docs: Dict[str, str] = {}
 
@@ -767,7 +767,7 @@ def value_counts(
             counts = result._values
 
         else:
-            keys, counts = _value_counts_arraylike(values, dropna)
+            keys, counts = value_counts_arraylike(values, dropna)
 
             result = Series(counts, index=keys, name=name)
 
@@ -780,8 +780,8 @@ def value_counts(
     return result
 
 
-# Called once from SparseArray
-def _value_counts_arraylike(values, dropna: bool):
+# Called once from SparseArray, otherwise could be private
+def value_counts_arraylike(values, dropna: bool):
     """
     Parameters
     ----------

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -735,7 +735,7 @@ def value_counts(self, dropna=True):
         """
         from pandas import Index, Series
 
-        keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
+        keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
         if fcounts > 0:
             if self._null_fill_value and dropna:

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -390,7 +390,7 @@ def fillna(
 
         mask = isna(self.values)
         if limit is not None:
-            limit = libalgos._validate_limit(None, limit=limit)
+            limit = libalgos.validate_limit(None, limit=limit)
             mask[mask.cumsum(self.ndim - 1) > limit] = False
 
         if not self._can_hold_na:

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -228,7 +228,7 @@ def interpolate_1d(
             )
 
     # default limit is unlimited GH #16282
-    limit = algos._validate_limit(nobs=None, limit=limit)
+    limit = algos.validate_limit(nobs=None, limit=limit)
 
     # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
     all_nans = set(np.flatnonzero(invalid))

diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py
@@ -17,8 +17,8 @@ def inner():
     return inner
 
 
-_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
-_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
-_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
-_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
-_mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
+mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
+mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
+mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
+mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
+mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -30,7 +30,7 @@
 import pandas.core.common as com
 
 from pandas.io.formats.printing import pprint_thing
-from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
+from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
 from pandas.plotting._matplotlib.style import get_standard_colors
 from pandas.plotting._matplotlib.timeseries import (
@@ -939,7 +939,7 @@ def _plot_colorbar(self, ax: "Axes", **kwds):
         img = ax.collections[-1]
         cbar = self.fig.colorbar(img, ax=ax, **kwds)
 
-        if _mpl_ge_3_0_0():
+        if mpl_ge_3_0_0():
             # The workaround below is no longer necessary.
             return
 

diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
@@ -307,7 +307,7 @@ def handle_shared_axes(
     sharey: bool,
 ):
     if nplots > 1:
-        if compat._mpl_ge_3_2_0():
+        if compat.mpl_ge_3_2_0():
             row_num = lambda x: x.get_subplotspec().rowspan.start
             col_num = lambda x: x.get_subplotspec().colspan.start
         else:

diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
@@ -28,10 +28,10 @@ def setup_method(self, method):
 
         mpl.rcdefaults()
 
-        self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3()
-        self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0()
-        self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0()
-        self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0()
+        self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
+        self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
+        self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
+        self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0()
 
         self.bp_n_objects = 7
         self.polycollection_factor = 2

diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
@@ -51,7 +51,7 @@ def _assert_xtickslabels_visibility(self, axes, expected):
     @pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
     @pytest.mark.slow
     def test_plot(self):
-        from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0
+        from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0
 
         df = self.tdf
         _check_plot_works(df.plot, grid=False)
@@ -69,7 +69,7 @@ def test_plot(self):
         self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
 
         df = DataFrame({"x": [1, 2], "y": [3, 4]})
-        if _mpl_ge_3_1_0():
+        if mpl_ge_3_1_0():
             msg = "'Line2D' object has no property 'blarg'"
         else:
             msg = "Unknown property blarg"

diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
@@ -96,7 +96,7 @@ def test_bootstrap_plot(self):
 class TestDataFramePlots(TestPlotBase):
     @td.skip_if_no_scipy
     def test_scatter_matrix_axis(self):
-        from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
+        from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
 
         scatter_matrix = plotting.scatter_matrix
 
@@ -105,7 +105,7 @@ def test_scatter_matrix_axis(self):
 
         # we are plotting multiples on a sub-plot
         with tm.assert_produces_warning(
-            UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0()
+            UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0()
         ):
             axes = _check_plot_works(
                 scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1

diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
@@ -16,9 +16,7 @@
 import sys
 import token
 import tokenize
-from typing import IO, Callable, FrozenSet, Iterable, List, Tuple
-
-PATHS_TO_IGNORE: Tuple[str, ...] = ("asv_bench/env",)
+from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple
 
 
 def _get_literal_string_prefix_len(token_string: str) -> int:
@@ -114,6 +112,58 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
                 )
 
 
+PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"}  # no known alternative
+
+
+def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not used across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of the private function that is used across modules.
+    msg : str
+        Explenation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    imported_modules: Set[str] = set()
+
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            for module in node.names:
+                module_fqdn = module.name if module.asname is None else module.asname
+                imported_modules.add(module_fqdn)
+
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            module_name = node.func.value.id
+            function_name = node.func.attr
+        except AttributeError:
+            continue
+
+        # Exception section #
+
+        # (Debatable) Class case
+        if module_name[0].isupper():
+            continue
+        # (Debatable) Dunder methods case
+        elif function_name.startswith("__") and function_name.endswith("__"):
+            continue
+        elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
+            continue
+
+        if module_name in imported_modules and function_name.startswith("_"):
+            yield (node.lineno, f"Private function '{module_name}.{function_name}'")
+
+
 def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
     """
     This test case is necessary after 'Black' (https://github.com/psf/black),
@@ -293,6 +343,7 @@ def main(
     source_path: str,
     output_format: str,
     file_extensions_to_check: str,
+    excluded_file_paths: str,
 ) -> bool:
     """
     Main entry point of the script.
@@ -305,6 +356,10 @@ def main(
         Source path representing path to a file/directory.
     output_format : str
         Output format of the error message.
+    file_extensions_to_check : str
+        Coma seperated values of what file extensions to check.
+    excluded_file_paths : str
+        Coma seperated values of what file paths to exclude during the check.
 
     Returns
     -------
@@ -325,6 +380,7 @@ def main(
     FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
         file_extensions_to_check.split(",")
     )
+    PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(","))
 
     if os.path.isfile(source_path):
         file_path = source_path
@@ -362,6 +418,7 @@ def main(
 if __name__ == "__main__":
     available_validation_types: List[str] = [
         "bare_pytest_raises",
+        "private_function_across_module",
         "strings_to_concatenate",
         "strings_with_wrong_placed_whitespace",
     ]
@@ -389,6 +446,11 @@ def main(
         default="py,pyx,pxd,pxi",
         help="Coma seperated file extensions to check.",
     )
+    parser.add_argument(
+        "--excluded-file-paths",
+        default="asv_bench/env",
+        help="Comma separated file extensions to check.",
+    )
 
     args = parser.parse_args()
 
@@ -398,5 +460,6 @@ def main(
             source_path=args.path,
             output_format=args.format,
             file_extensions_to_check=args.included_file_extensions,
+            excluded_file_paths=args.excluded_file_paths,
         )
     )