STY/WIP: check for private imports/lookups (#36055)

jbrockmendel · web-flow · commit 4729d8f6e6e7 · 2020-09-12T17:30:32.000-04:00
diff --git a/Makefile b/Makefile
@@ -32,3 +32,9 @@ check:
 		--included-file-extensions="py" \
 		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
 		pandas/
+
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_import_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		pandas/
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Check for use of private module attribute access' ; echo $MSG
+    MSG='Check for import of private attributes across modules' ; echo $MSG
     if [[ "$GITHUB_ACTIONS" == "true" ]]; then
-        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
     else
-        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for use of private functions across modules' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+    else
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -54,7 +54,7 @@
 
 from pandas.core import missing, nanops, ops
 from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
-from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.arrays.base import ExtensionOpsMixin
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array
@@ -472,11 +472,11 @@ class DatetimeLikeArrayMixin(
     def _ndarray(self) -> np.ndarray:
         return self._data
 
-    def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
+    def _from_backing_data(
+        self: DatetimeLikeArrayT, arr: np.ndarray
+    ) -> DatetimeLikeArrayT:
         # Note: we do not retain `freq`
-        return type(self)._simple_new(  # type: ignore[attr-defined]
-            arr, dtype=self.dtype
-        )
+        return type(self)._simple_new(arr, dtype=self.dtype)
 
     # ------------------------------------------------------------------
 
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -106,7 +106,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
             [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], []
         )
         if np.issubdtype(np_dtype, np.integer):
-            return _dtypes[str(np_dtype)]
+            return STR_TO_DTYPE[str(np_dtype)]
         return None
 
     def __from_arrow__(
@@ -214,7 +214,7 @@ def coerce_to_array(
 
         if not issubclass(type(dtype), _IntegerDtype):
             try:
-                dtype = _dtypes[str(np.dtype(dtype))]
+                dtype = STR_TO_DTYPE[str(np.dtype(dtype))]
             except KeyError as err:
                 raise ValueError(f"invalid dtype specified {dtype}") from err
 
@@ -354,7 +354,7 @@ class IntegerArray(BaseMaskedArray):
 
     @cache_readonly
     def dtype(self) -> _IntegerDtype:
-        return _dtypes[str(self._data.dtype)]
+        return STR_TO_DTYPE[str(self._data.dtype)]
 
     def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
         if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
@@ -735,7 +735,7 @@ class UInt64Dtype(_IntegerDtype):
     __doc__ = _dtype_docstring.format(dtype="uint64")
 
 
-_dtypes: Dict[str, _IntegerDtype] = {
+STR_TO_DTYPE: Dict[str, _IntegerDtype] = {
     "int8": Int8Dtype(),
     "int16": Int16Dtype(),
     "int32": Int32Dtype(),
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1151,9 +1151,11 @@ def convert_dtypes(
             target_int_dtype = "Int64"
 
             if is_integer_dtype(input_array.dtype):
-                from pandas.core.arrays.integer import _dtypes
+                from pandas.core.arrays.integer import STR_TO_DTYPE
 
-                inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype)
+                inferred_dtype = STR_TO_DTYPE.get(
+                    input_array.dtype.name, target_int_dtype
+                )
             if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
                 input_array.dtype
             ):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -459,7 +459,7 @@ def f(self):
 
 
 @contextmanager
-def group_selection_context(groupby: "_GroupBy"):
+def group_selection_context(groupby: "BaseGroupBy"):
     """
     Set / reset the group_selection_context.
     """
@@ -479,7 +479,7 @@ def group_selection_context(groupby: "_GroupBy"):
 ]
 
 
-class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
+class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
     _group_selection = None
     _apply_allowlist: FrozenSet[str] = frozenset()
 
@@ -1212,7 +1212,7 @@ def _apply_filter(self, indices, dropna):
 OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)
 
 
-class GroupBy(_GroupBy[FrameOrSeries]):
+class GroupBy(BaseGroupBy[FrameOrSeries]):
     """
     Class for grouping and aggregating relational data.
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -312,9 +312,9 @@ def _is_dates_only(self) -> bool:
         -------
         bool
         """
-        from pandas.io.formats.format import _is_dates_only
+        from pandas.io.formats.format import is_dates_only
 
-        return self.tz is None and _is_dates_only(self._values)
+        return self.tz is None and is_dates_only(self._values)
 
     def __reduce__(self):
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -26,7 +26,12 @@
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.groupby.base import GroupByMixin
 from pandas.core.groupby.generic import SeriesGroupBy
-from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby
+from pandas.core.groupby.groupby import (
+    BaseGroupBy,
+    GroupBy,
+    _pipe_template,
+    get_groupby,
+)
 from pandas.core.groupby.grouper import Grouper
 from pandas.core.groupby.ops import BinGrouper
 from pandas.core.indexes.api import Index
@@ -40,7 +45,7 @@
 _shared_docs_kwargs: Dict[str, str] = dict()
 
 
-class Resampler(_GroupBy, ShallowMixin):
+class Resampler(BaseGroupBy, ShallowMixin):
     """
     Class for resampling datetimelike data, a groupby-like operation.
     See aggregate, transform, and apply functions on this object.
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -15,7 +15,7 @@
 
 import pandas.core.common as common
 from pandas.core.window.common import _doc_template, _shared_docs, zsqrt
-from pandas.core.window.rolling import _Rolling, flex_binary_moment
+from pandas.core.window.rolling import RollingMixin, flex_binary_moment
 
 _bias_template = """
         Parameters
@@ -60,7 +60,7 @@ def get_center_of_mass(
     return float(comass)
 
 
-class ExponentialMovingWindow(_Rolling):
+class ExponentialMovingWindow(RollingMixin):
     r"""
     Provide exponential weighted (EW) functions.
 
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
@@ -5,10 +5,10 @@
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs
-from pandas.core.window.rolling import _Rolling_and_Expanding
+from pandas.core.window.rolling import RollingAndExpandingMixin
 
 
-class Expanding(_Rolling_and_Expanding):
+class Expanding(RollingAndExpandingMixin):
     """
     Provide expanding transformations.
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -1214,13 +1214,13 @@ def std(self, ddof=1, *args, **kwargs):
         return zsqrt(self.var(ddof=ddof, name="std", **kwargs))
 
 
-class _Rolling(_Window):
+class RollingMixin(_Window):
     @property
     def _constructor(self):
         return Rolling
 
 
-class _Rolling_and_Expanding(_Rolling):
+class RollingAndExpandingMixin(RollingMixin):
 
     _shared_docs["count"] = dedent(
         r"""
@@ -1917,7 +1917,7 @@ def _get_corr(a, b):
         )
 
 
-class Rolling(_Rolling_and_Expanding):
+class Rolling(RollingAndExpandingMixin):
     @cache_readonly
     def is_datetimelike(self) -> bool:
         return isinstance(
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1586,7 +1586,7 @@ def format_percentiles(
     return [i + "%" for i in out]
 
 
-def _is_dates_only(
+def is_dates_only(
     values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex]
 ) -> bool:
     # return a boolean if we are only dates (and don't have a timezone)
@@ -1658,8 +1658,8 @@ def get_format_datetime64_from_values(
         #  only accepts 1D values
         values = values.ravel()
 
-    is_dates_only = _is_dates_only(values)
-    if is_dates_only:
+    ido = is_dates_only(values)
+    if ido:
         return date_format or "%Y-%m-%d"
     return date_format
 
@@ -1668,9 +1668,9 @@ class Datetime64TZFormatter(Datetime64Formatter):
     def _format_strings(self) -> List[str]:
         """ we by definition have a TZ """
         values = self.values.astype(object)
-        is_dates_only = _is_dates_only(values)
+        ido = is_dates_only(values)
         formatter = self.formatter or get_format_datetime64(
-            is_dates_only, date_format=self.date_format
+            ido, date_format=self.date_format
         )
         fmt_values = [formatter(x) for x in values]
 
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
@@ -18,6 +18,39 @@
 import tokenize
 from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple
 
+PRIVATE_IMPORTS_TO_IGNORE: Set[str] = {
+    "_extension_array_shared_docs",
+    "_index_shared_docs",
+    "_interval_shared_docs",
+    "_merge_doc",
+    "_shared_docs",
+    "_apply_docs",
+    "_new_Index",
+    "_new_PeriodIndex",
+    "_doc_template",
+    "_agg_template",
+    "_pipe_template",
+    "_get_version",
+    "__main__",
+    "_transform_template",
+    "_arith_doc_FRAME",
+    "_flex_comp_doc_FRAME",
+    "_make_flex_doc",
+    "_op_descriptions",
+    "_IntegerDtype",
+    "_use_inf_as_na",
+    "_get_plot_backend",
+    "_matplotlib",
+    "_arrow_utils",
+    "_registry",
+    "_get_offset",  # TODO: remove after get_offset deprecation enforced
+    "_test_parse_iso8601",
+    "_json_normalize",  # TODO: remove after deprecation is enforced
+    "_testing",
+    "_test_decorators",
+    "__version__",  # check np.__version__ in compat.numpy.function
+}
+
 
 def _get_literal_string_prefix_len(token_string: str) -> int:
     """
@@ -164,6 +197,36 @@ def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str
             yield (node.lineno, f"Private function '{module_name}.{function_name}'")
 
 
+def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not imported across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of import statement, that imports the private function.
+    msg : str
+        Explenation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    for node in ast.walk(tree):
+        if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)):
+            continue
+
+        for module in node.names:
+            module_name = module.name.split(".")[-1]
+            if module_name in PRIVATE_IMPORTS_TO_IGNORE:
+                continue
+
+            if module_name.startswith("_"):
+                yield (node.lineno, f"Import of internal function {repr(module_name)}")
+
+
 def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
     """
     This test case is necessary after 'Black' (https://github.com/psf/black),
@@ -419,6 +482,7 @@ def main(
     available_validation_types: List[str] = [
         "bare_pytest_raises",
         "private_function_across_module",
+        "private_import_across_module",
         "strings_to_concatenate",
         "strings_with_wrong_placed_whitespace",
     ]
@@ -449,7 +513,7 @@ def main(
     parser.add_argument(
         "--excluded-file-paths",
         default="asv_bench/env",
-        help="Comma separated file extensions to check.",
+        help="Comma separated file paths to exclude.",
     )
 
     args = parser.parse_args()