Skip to content

STY/WIP: check for private imports/lookups #36055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Sep 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ check:
--included-file-extensions="py" \
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
pandas/

python3 scripts/validate_unwanted_patterns.py \
--validation-type="private_import_across_module" \
--included-file-extensions="py" \
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
pandas/
14 changes: 11 additions & 3 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of private module attribute access' ; echo $MSG
MSG='Check for import of private attributes across modules' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of private functions across modules' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

from pandas.core import missing, nanops, ops
from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.arrays.base import ExtensionOpsMixin
import pandas.core.common as com
from pandas.core.construction import array, extract_array
Expand Down Expand Up @@ -472,11 +472,11 @@ class DatetimeLikeArrayMixin(
def _ndarray(self) -> np.ndarray:
return self._data

def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
def _from_backing_data(
self: DatetimeLikeArrayT, arr: np.ndarray
) -> DatetimeLikeArrayT:
# Note: we do not retain `freq`
return type(self)._simple_new( # type: ignore[attr-defined]
arr, dtype=self.dtype
)
return type(self)._simple_new(arr, dtype=self.dtype)

# ------------------------------------------------------------------

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
[t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], []
)
if np.issubdtype(np_dtype, np.integer):
return _dtypes[str(np_dtype)]
return STR_TO_DTYPE[str(np_dtype)]
return None

def __from_arrow__(
Expand Down Expand Up @@ -214,7 +214,7 @@ def coerce_to_array(

if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = _dtypes[str(np.dtype(dtype))]
dtype = STR_TO_DTYPE[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err

Expand Down Expand Up @@ -354,7 +354,7 @@ class IntegerArray(BaseMaskedArray):

@cache_readonly
def dtype(self) -> _IntegerDtype:
return _dtypes[str(self._data.dtype)]
return STR_TO_DTYPE[str(self._data.dtype)]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
Expand Down Expand Up @@ -735,7 +735,7 @@ class UInt64Dtype(_IntegerDtype):
__doc__ = _dtype_docstring.format(dtype="uint64")


_dtypes: Dict[str, _IntegerDtype] = {
STR_TO_DTYPE: Dict[str, _IntegerDtype] = {
"int8": Int8Dtype(),
"int16": Int16Dtype(),
"int32": Int32Dtype(),
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,9 +1151,11 @@ def convert_dtypes(
target_int_dtype = "Int64"

if is_integer_dtype(input_array.dtype):
from pandas.core.arrays.integer import _dtypes
from pandas.core.arrays.integer import STR_TO_DTYPE

inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype)
inferred_dtype = STR_TO_DTYPE.get(
input_array.dtype.name, target_int_dtype
)
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
input_array.dtype
):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def f(self):


@contextmanager
def group_selection_context(groupby: "_GroupBy"):
def group_selection_context(groupby: "BaseGroupBy"):
"""
Set / reset the group_selection_context.
"""
Expand All @@ -479,7 +479,7 @@ def group_selection_context(groupby: "_GroupBy"):
]


class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
_group_selection = None
_apply_allowlist: FrozenSet[str] = frozenset()

Expand Down Expand Up @@ -1212,7 +1212,7 @@ def _apply_filter(self, indices, dropna):
OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)


class GroupBy(_GroupBy[FrameOrSeries]):
class GroupBy(BaseGroupBy[FrameOrSeries]):
"""
Class for grouping and aggregating relational data.

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,9 @@ def _is_dates_only(self) -> bool:
-------
bool
"""
from pandas.io.formats.format import _is_dates_only
from pandas.io.formats.format import is_dates_only

return self.tz is None and _is_dates_only(self._values)
return self.tz is None and is_dates_only(self._values)

def __reduce__(self):

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.groupby.generic import SeriesGroupBy
from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby
from pandas.core.groupby.groupby import (
BaseGroupBy,
GroupBy,
_pipe_template,
get_groupby,
)
from pandas.core.groupby.grouper import Grouper
from pandas.core.groupby.ops import BinGrouper
from pandas.core.indexes.api import Index
Expand All @@ -40,7 +45,7 @@
_shared_docs_kwargs: Dict[str, str] = dict()


class Resampler(_GroupBy, ShallowMixin):
class Resampler(BaseGroupBy, ShallowMixin):
"""
Class for resampling datetimelike data, a groupby-like operation.
See aggregate, transform, and apply functions on this object.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import pandas.core.common as common
from pandas.core.window.common import _doc_template, _shared_docs, zsqrt
from pandas.core.window.rolling import _Rolling, flex_binary_moment
from pandas.core.window.rolling import RollingMixin, flex_binary_moment

_bias_template = """
Parameters
Expand Down Expand Up @@ -60,7 +60,7 @@ def get_center_of_mass(
return float(comass)


class ExponentialMovingWindow(_Rolling):
class ExponentialMovingWindow(RollingMixin):
r"""
Provide exponential weighted (EW) functions.

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from pandas.util._decorators import Appender, Substitution, doc

from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs
from pandas.core.window.rolling import _Rolling_and_Expanding
from pandas.core.window.rolling import RollingAndExpandingMixin


class Expanding(_Rolling_and_Expanding):
class Expanding(RollingAndExpandingMixin):
"""
Provide expanding transformations.

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,13 +1214,13 @@ def std(self, ddof=1, *args, **kwargs):
return zsqrt(self.var(ddof=ddof, name="std", **kwargs))


class _Rolling(_Window):
class RollingMixin(_Window):
@property
def _constructor(self):
return Rolling


class _Rolling_and_Expanding(_Rolling):
class RollingAndExpandingMixin(RollingMixin):

_shared_docs["count"] = dedent(
r"""
Expand Down Expand Up @@ -1917,7 +1917,7 @@ def _get_corr(a, b):
)


class Rolling(_Rolling_and_Expanding):
class Rolling(RollingAndExpandingMixin):
@cache_readonly
def is_datetimelike(self) -> bool:
return isinstance(
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ def format_percentiles(
return [i + "%" for i in out]


def _is_dates_only(
def is_dates_only(
values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex]
) -> bool:
# return a boolean if we are only dates (and don't have a timezone)
Expand Down Expand Up @@ -1658,8 +1658,8 @@ def get_format_datetime64_from_values(
# only accepts 1D values
values = values.ravel()

is_dates_only = _is_dates_only(values)
if is_dates_only:
ido = is_dates_only(values)
if ido:
return date_format or "%Y-%m-%d"
return date_format

Expand All @@ -1668,9 +1668,9 @@ class Datetime64TZFormatter(Datetime64Formatter):
def _format_strings(self) -> List[str]:
""" we by definition have a TZ """
values = self.values.astype(object)
is_dates_only = _is_dates_only(values)
ido = is_dates_only(values)
formatter = self.formatter or get_format_datetime64(
is_dates_only, date_format=self.date_format
ido, date_format=self.date_format
)
fmt_values = [formatter(x) for x in values]

Expand Down
66 changes: 65 additions & 1 deletion scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,39 @@
import tokenize
from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple

PRIVATE_IMPORTS_TO_IGNORE: Set[str] = {
"_extension_array_shared_docs",
"_index_shared_docs",
"_interval_shared_docs",
"_merge_doc",
"_shared_docs",
"_apply_docs",
"_new_Index",
"_new_PeriodIndex",
"_doc_template",
"_agg_template",
"_pipe_template",
"_get_version",
"__main__",
"_transform_template",
"_arith_doc_FRAME",
"_flex_comp_doc_FRAME",
"_make_flex_doc",
"_op_descriptions",
"_IntegerDtype",
"_use_inf_as_na",
"_get_plot_backend",
"_matplotlib",
"_arrow_utils",
"_registry",
"_get_offset", # TODO: remove after get_offset deprecation enforced
"_test_parse_iso8601",
"_json_normalize", # TODO: remove after deprecation is enforced
"_testing",
"_test_decorators",
"__version__", # check np.__version__ in compat.numpy.function
}


def _get_literal_string_prefix_len(token_string: str) -> int:
"""
Expand Down Expand Up @@ -164,6 +197,36 @@ def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str
yield (node.lineno, f"Private function '{module_name}.{function_name}'")


def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
"""
Checking that a private function is not imported across modules.
Parameters
----------
file_obj : IO
File-like object containing the Python code to validate.
Yields
------
line_number : int
Line number of import statement, that imports the private function.
msg : str
Explenation of the error.
"""
contents = file_obj.read()
tree = ast.parse(contents)

for node in ast.walk(tree):
if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)):
continue

for module in node.names:
module_name = module.name.split(".")[-1]
if module_name in PRIVATE_IMPORTS_TO_IGNORE:
continue

if module_name.startswith("_"):
yield (node.lineno, f"Import of internal function {repr(module_name)}")


def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
"""
This test case is necessary after 'Black' (https://github.com/psf/black),
Expand Down Expand Up @@ -419,6 +482,7 @@ def main(
available_validation_types: List[str] = [
"bare_pytest_raises",
"private_function_across_module",
"private_import_across_module",
"strings_to_concatenate",
"strings_with_wrong_placed_whitespace",
]
Expand Down Expand Up @@ -449,7 +513,7 @@ def main(
parser.add_argument(
"--excluded-file-paths",
default="asv_bench/env",
help="Comma separated file extensions to check.",
help="Comma separated file paths to exclude.",
)

args = parser.parse_args()
Expand Down