From 25f3b97e929d04fdcd071eb9ce75242ad7134b5f Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 17:00:42 -0700 Subject: [PATCH 01/12] de-privatize names imported across modules --- asv_bench/benchmarks/sparse.py | 2 +- pandas/__init__.py | 194 ++++++++---------- pandas/_testing.py | 2 +- pandas/compat/numpy/__init__.py | 10 +- pandas/core/array_algos/masked_reductions.py | 4 +- pandas/core/arrays/sparse/accessor.py | 8 +- pandas/core/arrays/sparse/scipy_sparse.py | 4 +- pandas/core/common.py | 4 +- pandas/core/computation/engines.py | 4 +- pandas/core/computation/expr.py | 24 +-- pandas/core/computation/expressions.py | 12 +- pandas/core/computation/ops.py | 38 ++-- pandas/core/computation/scope.py | 4 +- pandas/core/dtypes/cast.py | 4 +- pandas/core/dtypes/common.py | 2 +- pandas/core/generic.py | 4 +- pandas/core/index.py | 2 +- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/multi.py | 4 +- pandas/core/nanops.py | 6 +- pandas/core/ops/__init__.py | 12 +- pandas/core/reshape/merge.py | 2 +- pandas/io/formats/format.py | 4 +- pandas/io/json/_json.py | 6 +- pandas/io/parsers.py | 6 +- pandas/tests/api/test_api.py | 6 +- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/computation/test_eval.py | 42 ++-- pandas/tests/frame/test_arithmetic.py | 2 +- pandas/tests/generic/test_generic.py | 6 +- pandas/tests/indexes/common.py | 4 +- pandas/tests/indexes/multi/test_analytics.py | 4 +- pandas/tests/indexes/test_numpy_compat.py | 8 +- pandas/tests/indexing/test_loc.py | 4 +- .../tests/scalar/timedelta/test_arithmetic.py | 4 +- pandas/tests/test_common.py | 4 +- pandas/tests/test_expressions.py | 2 +- pandas/util/_test_decorators.py | 6 +- 38 files changed, 224 insertions(+), 238 deletions(-) diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 28ceb25eebd96..15d450d987f9d 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -71,7 +71,7 @@ def setup(self): s.index = MultiIndex.from_product([range(10)] * 4) self.ss = s.astype("Sparse") - def time_sparse_series_to_coo(self): + def timesparse_series_to_coo(self): self.ss.sparse.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True) diff --git a/pandas/__init__.py b/pandas/__init__.py index 2737bcd8f9ccf..bb4b54e62d7aa 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -20,9 +20,9 @@ # numpy compat from pandas.compat.numpy import ( - _np_version_under1p17, - _np_version_under1p18, - _is_numpy_dev, + is_numpy_dev as _is_numpy_dev, + np_version_under1p17 as _np_version_under1p17, + np_version_under1p18 as _np_version_under1p18, ) try: @@ -37,148 +37,134 @@ ) from e from pandas._config import ( - get_option, - set_option, - reset_option, describe_option, + get_option, option_context, options, + reset_option, + set_option, ) +from pandas.util._print_versions import show_versions +from pandas.util._tester import test + +import pandas.api +import pandas.arrays +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.computation.api import eval + # let init-time option registration happen import pandas.core.config_init +from pandas.core.reshape.api import ( + concat, + crosstab, + cut, + get_dummies, + lreshape, + melt, + merge, + merge_asof, + merge_ordered, + pivot, + pivot_table, + qcut, + wide_to_long, +) +import pandas.testing -from pandas.core.api import ( - # dtype +from pandas.io.json import _json_normalize as json_normalize +from pandas.tseries import offsets +from pandas.tseries.api import infer_freq + +# use the closest tagged version if possible +from ._version import get_versions + +from pandas.core.api import ( # dtype; missing; indexes; tseries; conversion; misc + NA, + BooleanDtype, + Categorical, + CategoricalDtype, + CategoricalIndex, + DataFrame, + DateOffset, + DatetimeIndex, + DatetimeTZDtype, + Flags, + Float64Index, + Grouper, + Index, + IndexSlice, Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + Int64Index, + Interval, + IntervalDtype, + IntervalIndex, + MultiIndex, + NamedAgg, + NaT, + Period, + PeriodDtype, + PeriodIndex, + RangeIndex, + Series, + StringDtype, + Timedelta, + TimedeltaIndex, + Timestamp, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype, - CategoricalDtype, - PeriodDtype, - IntervalDtype, - DatetimeTZDtype, - StringDtype, - BooleanDtype, - # missing - NA, + UInt64Index, + array, + bdate_range, + date_range, + factorize, + interval_range, isna, isnull, notna, notnull, - # indexes - Index, - CategoricalIndex, - Int64Index, - UInt64Index, - RangeIndex, - Float64Index, - MultiIndex, - IntervalIndex, - TimedeltaIndex, - DatetimeIndex, - PeriodIndex, - IndexSlice, - # tseries - NaT, - Period, period_range, - Timedelta, + set_eng_float_format, timedelta_range, - Timestamp, - date_range, - bdate_range, - Interval, - interval_range, - DateOffset, - # conversion - to_numeric, to_datetime, + to_numeric, to_timedelta, - # misc - Flags, - Grouper, - factorize, unique, value_counts, - NamedAgg, - array, - Categorical, - set_eng_float_format, - Series, - DataFrame, -) - -from pandas.core.arrays.sparse import SparseDtype - -from pandas.tseries.api import infer_freq -from pandas.tseries import offsets - -from pandas.core.computation.api import eval - -from pandas.core.reshape.api import ( - concat, - lreshape, - melt, - wide_to_long, - merge, - merge_asof, - merge_ordered, - crosstab, - pivot, - pivot_table, - get_dummies, - cut, - qcut, ) -import pandas.api -from pandas.util._print_versions import show_versions -from pandas.io.api import ( - # excel +from pandas.io.api import ( # excel; parsers; pickle; pytables; sql; misc ExcelFile, ExcelWriter, - read_excel, - # parsers - read_csv, - read_fwf, - read_table, - # pickle - read_pickle, - to_pickle, - # pytables HDFStore, - read_hdf, - # sql - read_sql, - read_sql_query, - read_sql_table, - # misc read_clipboard, - read_parquet, - read_orc, + read_csv, + read_excel, read_feather, + read_fwf, read_gbq, + read_hdf, read_html, read_json, - read_stata, + read_orc, + read_parquet, + read_pickle, read_sas, read_spss, + read_sql, + read_sql_query, + read_sql_table, + read_stata, + read_table, + to_pickle, ) -from pandas.io.json import _json_normalize as json_normalize - -from pandas.util._tester import test -import pandas.testing -import pandas.arrays - -# use the closest tagged version if possible -from ._version import get_versions v = get_versions() __version__ = v.get("closest-tag", v["version"]) diff --git a/pandas/_testing.py b/pandas/_testing.py index 7dba578951deb..9db0c3496e290 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -2713,7 +2713,7 @@ def use_numexpr(use, min_elements=None): if min_elements is None: min_elements = expr._MIN_ELEMENTS - olduse = expr._USE_NUMEXPR + olduse = expr.USE_NUMEXPR oldmin = expr._MIN_ELEMENTS expr.set_use_numexpr(use) expr._MIN_ELEMENTS = min_elements diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 08d06da93bb45..a2444b7ba5a0d 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -8,11 +8,11 @@ # numpy versioning _np_version = np.__version__ _nlv = LooseVersion(_np_version) -_np_version_under1p17 = _nlv < LooseVersion("1.17") -_np_version_under1p18 = _nlv < LooseVersion("1.18") +np_version_under1p17 = _nlv < LooseVersion("1.17") +np_version_under1p18 = _nlv < LooseVersion("1.18") _np_version_under1p19 = _nlv < LooseVersion("1.19") _np_version_under1p20 = _nlv < LooseVersion("1.20") -_is_numpy_dev = ".dev" in str(_nlv) +is_numpy_dev = ".dev" in str(_nlv) _min_numpy_ver = "1.16.5" @@ -65,6 +65,6 @@ def np_array_datetime64_compat(arr, *args, **kwargs): __all__ = [ "np", "_np_version", - "_np_version_under1p17", - "_is_numpy_dev", + "np_version_under1p17", + "is_numpy_dev", ] diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 1b9ed014f27b7..3f4625e2b712a 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import missing as libmissing -from pandas.compat.numpy import _np_version_under1p17 +from pandas.compat.numpy import np_version_under1p17 from pandas.core.nanops import check_below_min_count @@ -46,7 +46,7 @@ def _sumprod( if check_below_min_count(values.shape, mask, min_count): return libmissing.NA - if _np_version_under1p17: + if np_version_under1p17: return func(values[~mask]) else: return func(values, where=~mask) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index da8d695c59b9e..ec4b0fd89860c 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -88,9 +88,9 @@ def from_coo(cls, A, dense_index=False): dtype: Sparse[float64, nan] """ from pandas import Series - from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series + from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series - result = _coo_to_sparse_series(A, dense_index=dense_index) + result = coo_to_sparse_series(A, dense_index=dense_index) result = Series(result.array, index=result.index, copy=False) return result @@ -168,9 +168,9 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): >>> columns [('a', 0), ('a', 1), ('b', 0), ('b', 1)] """ - from pandas.core.arrays.sparse.scipy_sparse import _sparse_series_to_coo + from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo - A, rows, columns = _sparse_series_to_coo( + A, rows, columns = sparse_series_to_coo( self._parent, row_levels, column_levels, sort_labels=sort_labels ) return A, rows, columns diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index eafd782dc9b9c..56c678c88b9c7 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -85,7 +85,7 @@ def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): return values, i_coord, j_coord, i_labels, j_labels -def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): +def sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): """ Convert a sparse Series to a scipy.sparse.coo_matrix using index levels row_levels, column_levels as the row and column @@ -113,7 +113,7 @@ def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=F return sparse_matrix, rows, columns -def _coo_to_sparse_series(A, dense_index: bool = False): +def coo_to_sparse_series(A, dense_index: bool = False): """ Convert a scipy.sparse.coo_matrix to a SparseSeries. diff --git a/pandas/core/common.py b/pandas/core/common.py index 279d512e5a046..968fb180abcd0 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -16,7 +16,7 @@ from pandas._libs import lib, tslibs from pandas._typing import AnyArrayLike, Scalar, T -from pandas.compat.numpy import _np_version_under1p18 +from pandas.compat.numpy import np_version_under1p18 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( @@ -425,7 +425,7 @@ def random_state(state=None): if ( is_integer(state) or is_array_like(state) - or (not _np_version_under1p18 and isinstance(state, np.random.BitGenerator)) + or (not np_version_under1p18 and isinstance(state, np.random.BitGenerator)) ): return np.random.RandomState(state) elif isinstance(state, np.random.RandomState): diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 9c5388faae1bd..0cdc0f530a7f3 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -6,11 +6,11 @@ from typing import Dict, Type from pandas.core.computation.align import align_terms, reconstruct_object -from pandas.core.computation.ops import _mathops, _reductions +from pandas.core.computation.ops import MATHOPS, REDUCTIONS import pandas.io.formats.printing as printing -_ne_builtins = frozenset(_mathops + _reductions) +_ne_builtins = frozenset(MATHOPS + REDUCTIONS) class NumExprClobberingError(NameError): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index df71b4fe415f8..8cff6abc071ca 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -12,7 +12,13 @@ import pandas.core.common as com from pandas.core.computation.ops import ( - _LOCAL_TAG, + ARITH_OPS_SYMS, + BOOL_OPS_SYMS, + CMP_OPS_SYMS, + LOCAL_TAG, + MATHOPS, + REDUCTIONS, + UNARY_OPS_SYMS, BinOp, Constant, Div, @@ -21,12 +27,6 @@ Term, UnaryOp, UndefinedVariableError, - _arith_ops_syms, - _bool_ops_syms, - _cmp_ops_syms, - _mathops, - _reductions, - _unary_ops_syms, is_term, ) from pandas.core.computation.parsing import clean_backtick_quoted_toks, tokenize_string @@ -101,7 +101,7 @@ def _replace_locals(tok: Tuple[int, str]) -> Tuple[int, str]: """ toknum, tokval = tok if toknum == tokenize.OP and tokval == "@": - return tokenize.OP, _LOCAL_TAG + return tokenize.OP, LOCAL_TAG return toknum, tokval @@ -338,7 +338,7 @@ class BaseExprVisitor(ast.NodeVisitor): const_type: Type[Term] = Constant term_type = Term - binary_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms + binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS binary_op_nodes = ( "Gt", "Lt", @@ -362,7 +362,7 @@ class BaseExprVisitor(ast.NodeVisitor): ) binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes)) - unary_ops = _unary_ops_syms + unary_ops = UNARY_OPS_SYMS unary_op_nodes = "UAdd", "USub", "Invert", "Not" unary_op_nodes_map = {k: v for k, v in zip(unary_ops, unary_op_nodes)} @@ -494,7 +494,7 @@ def _maybe_evaluate_binop( if self.engine != "pytables": if ( - res.op in _cmp_ops_syms + res.op in CMP_OPS_SYMS and getattr(lhs, "is_datetime", False) or getattr(rhs, "is_datetime", False) ): @@ -726,7 +726,7 @@ def visitor(x, y): _python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"]) -_numexpr_supported_calls = frozenset(_reductions + _mathops) +_numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS) @disallow( diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index d2c08c343ab4b..0032fe97b8b33 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -23,7 +23,7 @@ _TEST_MODE = None _TEST_RESULT: List[bool] = list() -_USE_NUMEXPR = NUMEXPR_INSTALLED +USE_NUMEXPR = NUMEXPR_INSTALLED _evaluate = None _where = None @@ -39,21 +39,21 @@ def set_use_numexpr(v=True): # set/unset to use numexpr - global _USE_NUMEXPR + global USE_NUMEXPR if NUMEXPR_INSTALLED: - _USE_NUMEXPR = v + USE_NUMEXPR = v # choose what we are going to do global _evaluate, _where - _evaluate = _evaluate_numexpr if _USE_NUMEXPR else _evaluate_standard - _where = _where_numexpr if _USE_NUMEXPR else _where_standard + _evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard + _where = _where_numexpr if USE_NUMEXPR else _where_standard def set_numexpr_threads(n=None): # if we are using numexpr, set the threads to n # otherwise reset - if NUMEXPR_INSTALLED and _USE_NUMEXPR: + if NUMEXPR_INSTALLED and USE_NUMEXPR: if n is None: n = ne.detect_number_of_cores() ne.set_num_threads(n) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 1fb3910b8577d..5759cd17476d6 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -16,11 +16,11 @@ import pandas.core.common as com from pandas.core.computation.common import ensure_decoded, result_type_many -from pandas.core.computation.scope import _DEFAULT_GLOBALS +from pandas.core.computation.scope import DEFAULT_GLOBALS from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded -_reductions = ("sum", "prod") +REDUCTIONS = ("sum", "prod") _unary_math_ops = ( "sin", @@ -46,10 +46,10 @@ ) _binary_math_ops = ("arctan2",) -_mathops = _unary_math_ops + _binary_math_ops +MATHOPS = _unary_math_ops + _binary_math_ops -_LOCAL_TAG = "__pd_eval_local_" +LOCAL_TAG = "__pd_eval_local_" class UndefinedVariableError(NameError): @@ -80,13 +80,13 @@ def __init__(self, name, env, side=None, encoding=None): self.env = env self.side = side tname = str(name) - self.is_local = tname.startswith(_LOCAL_TAG) or tname in _DEFAULT_GLOBALS + self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS self._value = self._resolve_name() self.encoding = encoding @property def local_name(self) -> str: - return self.name.replace(_LOCAL_TAG, "") + return self.name.replace(LOCAL_TAG, "") def __repr__(self) -> str: return pprint_thing(self.name) @@ -220,7 +220,7 @@ def __repr__(self) -> str: @property def return_type(self): # clobber types to bool if the op is a boolean operator - if self.op in (_cmp_ops_syms + _bool_ops_syms): + if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS): return np.bool_ return result_type_many(*(term.type for term in com.flatten(self))) @@ -280,7 +280,7 @@ def _not_in(x, y): return x not in y -_cmp_ops_syms = (">", "<", ">=", "<=", "==", "!=", "in", "not in") +CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in") _cmp_ops_funcs = ( operator.gt, operator.lt, @@ -291,13 +291,13 @@ def _not_in(x, y): _in, _not_in, ) -_cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs)) +_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs)) -_bool_ops_syms = ("&", "|", "and", "or") +BOOL_OPS_SYMS = ("&", "|", "and", "or") _bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) -_bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs)) +_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs)) -_arith_ops_syms = ("+", "-", "*", "/", "**", "//", "%") +ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%") _arith_ops_funcs = ( operator.add, operator.sub, @@ -307,12 +307,12 @@ def _not_in(x, y): operator.floordiv, operator.mod, ) -_arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs)) +_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs)) -_special_case_arith_ops_syms = ("**", "//", "%") +SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%") _special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) _special_case_arith_ops_dict = dict( - zip(_special_case_arith_ops_syms, _special_case_arith_ops_funcs) + zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs) ) _binary_ops_dict = {} @@ -530,9 +530,9 @@ def __init__(self, lhs, rhs): _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) -_unary_ops_syms = ("+", "-", "~", "not") +UNARY_OPS_SYMS = ("+", "-", "~", "not") _unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) -_unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs)) +_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) class UnaryOp(Op): @@ -561,7 +561,7 @@ def __init__(self, op: str, operand): except KeyError as err: raise ValueError( f"Invalid unary operator {repr(op)}, " - f"valid operators are {_unary_ops_syms}" + f"valid operators are {UNARY_OPS_SYMS}" ) from err def __call__(self, env): @@ -602,7 +602,7 @@ class FuncNode: def __init__(self, name: str): from pandas.core.computation.check import NUMEXPR_INSTALLED, NUMEXPR_VERSION - if name not in _mathops or ( + if name not in MATHOPS or ( NUMEXPR_INSTALLED and NUMEXPR_VERSION < LooseVersion("2.6.9") and name in ("floor", "ceil") diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 83bf92ad737e4..2925f583bfc56 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -53,7 +53,7 @@ def _raw_hex_id(obj) -> str: return "".join(_replacer(x) for x in packed) -_DEFAULT_GLOBALS = { +DEFAULT_GLOBALS = { "Timestamp": Timestamp, "datetime": datetime.datetime, "True": True, @@ -114,7 +114,7 @@ def __init__( # shallow copy because we don't want to keep filling this up with what # was there before if there are multiple calls to Scope/_ensure_scope - self.scope = DeepChainMap(_DEFAULT_GLOBALS.copy()) + self.scope = DeepChainMap(DEFAULT_GLOBALS.copy()) self.target = target if isinstance(local_dict, Scope): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7c5aafcbbc7e9..8f9c0cf7a01db 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -22,9 +22,9 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( - _POSSIBLY_CAST_DTYPES, DT64NS_DTYPE, INT64_DTYPE, + POSSIBLY_CAST_DTYPES, TD64NS_DTYPE, ensure_int8, ensure_int16, @@ -1188,7 +1188,7 @@ def maybe_castable(arr) -> bool: elif kind == "m": return is_timedelta64_ns_dtype(arr.dtype) - return arr.dtype.name not in _POSSIBLY_CAST_DTYPES + return arr.dtype.name not in POSSIBLY_CAST_DTYPES def maybe_infer_to_datetimelike(value, convert_dates: bool = False): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 6ad46eb967275..5987fdabf78bb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -43,7 +43,7 @@ is_sequence, ) -_POSSIBLY_CAST_DTYPES = { +POSSIBLY_CAST_DTYPES = { np.dtype(t).name for t in [ "O", diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 93c945638a174..40f0c6200e835 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -102,7 +102,7 @@ import pandas.core.indexing as indexing from pandas.core.internals import BlockManager from pandas.core.missing import find_valid_index -from pandas.core.ops import _align_method_FRAME +from pandas.core.ops import align_method_FRAME from pandas.core.shared_docs import _shared_docs from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window @@ -7402,7 +7402,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if isinstance(self, ABCSeries): threshold = self._constructor(threshold, index=self.index) else: - threshold = _align_method_FRAME(self, threshold, axis, flex=None)[1] + threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] return self.where(subset, threshold, axis=axis, inplace=inplace) def clip( diff --git a/pandas/core/index.py b/pandas/core/index.py index a315b9619b0e7..c961c5fe1aa9d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -19,7 +19,7 @@ ensure_index_from_sequences, get_objs_combined_axis, ) -from pandas.core.indexes.multi import _sparsify # noqa:F401 +from pandas.core.indexes.multi import sparsify_labels as _sparsify # noqa:F401 # GH#30193 warnings.warn( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a1bc8a4659b24..526dae7e256b7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3586,7 +3586,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) def _join_multi(self, other, how, return_indexers=True): from pandas.core.indexes.multi import MultiIndex - from pandas.core.reshape.merge import _restore_dropped_levels_multijoin + from pandas.core.reshape.merge import restore_dropped_levels_multijoin # figure out join names self_names = set(com.not_none(*self.names)) @@ -3622,7 +3622,7 @@ def _join_multi(self, other, how, return_indexers=True): # common levels, ldrop_names, rdrop_names dropped_names = ldrop_names + rdrop_names - levels, codes, names = _restore_dropped_levels_multijoin( + levels, codes, names = restore_dropped_levels_multijoin( self, other, dropped_names, join_idx, lidx, ridx ) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e49a23935efbd..4dd7503391df7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1337,7 +1337,7 @@ def format( if sparsify in [False, lib.no_default]: sentinel = sparsify # little bit of a kludge job for #1217 - result_levels = _sparsify( + result_levels = sparsify_labels( result_levels, start=int(names), sentinel=sentinel ) @@ -3690,7 +3690,7 @@ def _add_numeric_methods_disabled(cls): MultiIndex._add_logical_methods_disabled() -def _sparsify(label_list, start: int = 0, sentinel=""): +def sparsify_labels(label_list, start: int = 0, sentinel=""): pivoted = list(zip(*label_list)) k = len(label_list) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6fdde22a1c514..64470da2fb910 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,7 +11,7 @@ from pandas._typing import ArrayLike, Dtype, DtypeObj, F, Scalar from pandas.compat._optional import import_optional_dependency -from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask +from pandas.core.dtypes.cast import maybe_upcast_putmask from pandas.core.dtypes.common import ( get_dtype, is_any_int_dtype, @@ -185,7 +185,7 @@ def _get_fill_value( else: if fill_value_typ == "+inf": # need the max int here - return _int64_max + return np.iinfo(np.int64).max else: return iNaT @@ -346,7 +346,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None): result = np.nan # raise if we have a timedelta64[ns] which is too large - if np.fabs(result) > _int64_max: + if np.fabs(result) > np.iinfo(np.int64).max: raise ValueError("overflow in timedelta operation") result = Timedelta(result, unit="ns") diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 60f3d23aaed13..8fcbee6a20ac3 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -306,7 +306,7 @@ def dispatch_to_series(left, right, func, axis: Optional[int] = None): def _align_method_SERIES(left: "Series", right, align_asobject: bool = False): """ align lhs and rhs Series """ - # ToDo: Different from _align_method_FRAME, list, tuple and ndarray + # ToDo: Different from align_method_FRAME, list, tuple and ndarray # are not coerced here # because Series has inconsistencies described in #13637 @@ -430,7 +430,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # DataFrame -def _align_method_FRAME( +def align_method_FRAME( left, right, axis, flex: Optional[bool] = False, level: Level = None ): """ @@ -571,7 +571,7 @@ def _frame_arith_method_with_reindex( new_right = right.iloc[:, rcols] result = op(new_left, new_right) - # Do the join on the columns instead of using _align_method_FRAME + # Do the join on the columns instead of using align_method_FRAME # to avoid constructing two potentially large/sparse DataFrames join_columns, _, _ = left.columns.join( right.columns, how="outer", level=None, return_indexers=True @@ -644,7 +644,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): # TODO: why are we passing flex=True instead of flex=not special? # 15 tests fail if we pass flex=not special instead - self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame @@ -680,7 +680,7 @@ def _flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): def f(self, other, axis=default_axis, level=None): axis = self._get_axis_number(axis) if axis is not None else 1 - self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) new_data = dispatch_to_series(self, other, op, axis=axis) return self._construct_result(new_data) @@ -698,7 +698,7 @@ def _comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): def f(self, other): axis = 1 # only relevant for Series other case - self, other = _align_method_FRAME(self, other, axis, level=None, flex=False) + self, other = align_method_FRAME(self, other, axis, level=None, flex=False) # See GH#4537 for discussion of scalar op behavior new_data = dispatch_to_series(self, other, op, axis=axis) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f1c5486222ea1..030dec369c2be 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1350,7 +1350,7 @@ def _get_join_indexers( return join_func(lkey, rkey, count, **kwargs) -def _restore_dropped_levels_multijoin( +def restore_dropped_levels_multijoin( left: MultiIndex, right: MultiIndex, dropped_level_names, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 70e38c3106bdb..891039eadca4b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -994,7 +994,7 @@ def to_html( ) def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: - from pandas.core.indexes.multi import _sparsify + from pandas.core.indexes.multi import sparsify_labels columns = frame.columns @@ -1020,7 +1020,7 @@ def space_format(x, y): zip(*[[space_format(x, y) for y in x] for x in fmt_columns]) ) if self.sparsify and len(str_columns): - str_columns = _sparsify(str_columns) + str_columns = sparsify_labels(str_columns) str_columns = [list(x) for x in zip(*str_columns)] else: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index a4d923fdbe45a..e3000788cb33a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -22,7 +22,7 @@ from pandas.io.common import get_compression_method, get_filepath_or_buffer, get_handle from pandas.io.json._normalize import convert_to_line_delimits from pandas.io.json._table_schema import build_table_schema, parse_table_schema -from pandas.io.parsers import _validate_integer +from pandas.io.parsers import validate_integer loads = json.loads dumps = json.dumps @@ -698,11 +698,11 @@ def __init__( self.file_handles: List[IO] = [] if self.chunksize is not None: - self.chunksize = _validate_integer("chunksize", self.chunksize, 1) + self.chunksize = validate_integer("chunksize", self.chunksize, 1) if not self.lines: raise ValueError("chunksize can only be passed if lines=True") if self.nrows is not None: - self.nrows = _validate_integer("nrows", self.nrows, 0) + self.nrows = validate_integer("nrows", self.nrows, 0) if not self.lines: raise ValueError("nrows can only be passed if lines=True") diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a0466c5ac6b57..4c619a636f057 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -361,7 +361,7 @@ ) -def _validate_integer(name, val, min_val=0): +def validate_integer(name, val, min_val=0): """ Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer @@ -436,7 +436,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get("iterator", False) - chunksize = _validate_integer("chunksize", kwds.get("chunksize", None), 1) + chunksize = validate_integer("chunksize", kwds.get("chunksize", None), 1) nrows = kwds.get("nrows", None) # Check for duplicates in names. @@ -1179,7 +1179,7 @@ def _failover_to_python(self): raise AbstractMethodError(self) def read(self, nrows=None): - nrows = _validate_integer("nrows", nrows) + nrows = validate_integer("nrows", nrows) ret = self._engine.read(nrows) # May alter columns / col_dict diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 54da13c3c620b..1e53214e5d517 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -189,9 +189,9 @@ class TestPDApi(Base): "_hashtable", "_lib", "_libs", - "_np_version_under1p17", - "_np_version_under1p18", - "_is_numpy_dev", + "np_version_under1p17", + "np_version_under1p18", + "is_numpy_dev", "_testing", "_tslib", "_typing", diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b1ab700427c28..8f1ca9a4a7a96 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -4,7 +4,7 @@ import pytest from pandas._libs import OutOfBoundsDatetime -from pandas.compat.numpy import _np_version_under1p18 +from pandas.compat.numpy import np_version_under1p18 import pandas as pd import pandas._testing as tm @@ -955,7 +955,7 @@ def test_invalid_nat_setitem_array(array, non_casting_nats): ], ) def test_to_numpy_extra(array): - if _np_version_under1p18: + if np_version_under1p18: # np.isnan(NaT) raises, so use pandas' isnan = pd.isna else: diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 49066428eb16c..72dc04e68c154 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -26,12 +26,12 @@ PandasExprVisitor, PythonExprVisitor, ) -from pandas.core.computation.expressions import _USE_NUMEXPR, NUMEXPR_INSTALLED +from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR from pandas.core.computation.ops import ( - _arith_ops_syms, + ARITH_OPS_SYMS, + SPECIAL_CASE_ARITH_OPS_SYMS, _binary_math_ops, _binary_ops_dict, - _special_case_arith_ops_syms, _unary_math_ops, ) @@ -41,8 +41,8 @@ pytest.param( engine, marks=pytest.mark.skipif( - engine == "numexpr" and not _USE_NUMEXPR, - reason=f"numexpr enabled->{_USE_NUMEXPR}, " + engine == "numexpr" and not USE_NUMEXPR, + reason=f"numexpr enabled->{USE_NUMEXPR}, " f"installed->{NUMEXPR_INSTALLED}", ), ) @@ -114,7 +114,7 @@ def _is_py3_complex_incompat(result, expected): return isinstance(expected, (complex, np.complexfloating)) and np.isnan(result) -_good_arith_ops = set(_arith_ops_syms).difference(_special_case_arith_ops_syms) +_good_arith_ops = set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS) @td.skip_if_no_ne @@ -158,10 +158,10 @@ def setup_data(self): self.rhses = self.pandas_rhses + self.scalar_rhses def setup_ops(self): - self.cmp_ops = expr._cmp_ops_syms + self.cmp_ops = expr.CMP_OPS_SYMS self.cmp2_ops = self.cmp_ops[::-1] - self.bin_ops = expr._bool_ops_syms - self.special_case_ops = _special_case_arith_ops_syms + self.bin_ops = expr.BOOL_OPS_SYMS + self.special_case_ops = SPECIAL_CASE_ARITH_OPS_SYMS self.arith_ops = _good_arith_ops self.unary_ops = "-", "~", "not " @@ -774,10 +774,10 @@ def setup_class(cls): cls.parser = "python" def setup_ops(self): - self.cmp_ops = [op for op in expr._cmp_ops_syms if op not in ("in", "not in")] + self.cmp_ops = [op for op in expr.CMP_OPS_SYMS if op not in ("in", "not in")] self.cmp2_ops = self.cmp_ops[::-1] - self.bin_ops = [op for op in expr._bool_ops_syms if op not in ("and", "or")] - self.special_case_ops = _special_case_arith_ops_syms + self.bin_ops = [op for op in expr.BOOL_OPS_SYMS if op not in ("and", "or")] + self.special_case_ops = SPECIAL_CASE_ARITH_OPS_SYMS self.arith_ops = _good_arith_ops self.unary_ops = "+", "-", "~" @@ -1135,7 +1135,7 @@ class TestOperationsNumExprPandas: def setup_class(cls): cls.engine = "numexpr" cls.parser = "pandas" - cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + cls.arith_ops = expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS @classmethod def teardown_class(cls): @@ -1177,7 +1177,7 @@ def test_simple_arith_ops(self): assert y == expec def test_simple_bool_ops(self): - for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): + for op, lhs, rhs in product(expr.BOOL_OPS_SYMS, (True, False), (True, False)): ex = f"{lhs} {op} {rhs}" res = self.eval(ex) exp = eval(ex) @@ -1185,7 +1185,7 @@ def test_simple_bool_ops(self): def test_bool_ops_with_constants(self): for op, lhs, rhs in product( - expr._bool_ops_syms, ("True", "False"), ("True", "False") + expr.BOOL_OPS_SYMS, ("True", "False"), ("True", "False") ): ex = f"{lhs} {op} {rhs}" res = self.eval(ex) @@ -1637,7 +1637,7 @@ def setup_class(cls): cls.parser = "python" cls.arith_ops = [ op - for op in expr._arith_ops_syms + expr._cmp_ops_syms + for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS if op not in ("in", "not in") ] @@ -1697,7 +1697,7 @@ def test_fails_pipe(self): def test_bool_ops_with_constants(self): for op, lhs, rhs in product( - expr._bool_ops_syms, ("True", "False"), ("True", "False") + expr.BOOL_OPS_SYMS, ("True", "False"), ("True", "False") ): ex = f"{lhs} {op} {rhs}" if op in ("and", "or"): @@ -1710,7 +1710,7 @@ def test_bool_ops_with_constants(self): assert res == exp def test_simple_bool_ops(self): - for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): + for op, lhs, rhs in product(expr.BOOL_OPS_SYMS, (True, False), (True, False)): ex = f"lhs {op} rhs" if op in ("and", "or"): msg = "'BoolOp' nodes are not implemented" @@ -1729,7 +1729,7 @@ def setup_class(cls): cls.engine = cls.parser = "python" cls.arith_ops = [ op - for op in expr._arith_ops_syms + expr._cmp_ops_syms + for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS if op not in ("in", "not in") ] @@ -1740,7 +1740,7 @@ def setup_class(cls): super().setup_class() cls.engine = "python" cls.parser = "pandas" - cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + cls.arith_ops = expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS @td.skip_if_no_ne @@ -2020,7 +2020,7 @@ def test_equals_various(other): df = DataFrame({"A": ["a", "b", "c"]}) result = df.eval(f"A == {other}") expected = Series([False, False, False], name="A") - if _USE_NUMEXPR: + if USE_NUMEXPR: # https://github.com/pandas-dev/pandas/issues/10239 # lose name with numexpr engine. Remove when that's fixed. expected.name = None diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 70d0b4e9e835c..6dd8d890e8a4b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1417,7 +1417,7 @@ def test_alignment_non_pandas(self): columns = ["X", "Y", "Z"] df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) - align = pd.core.ops._align_method_FRAME + align = pd.core.ops.align_method_FRAME for val in [ [1, 2, 3], (1, 2, 3), diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 23bb673586768..2c2584e8dee01 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat.numpy import _np_version_under1p17 +from pandas.compat.numpy import np_version_under1p17 from pandas.core.dtypes.common import is_scalar @@ -652,12 +652,12 @@ def test_sample(sel): pytest.param( "np.random.MT19937", 3, - marks=pytest.mark.skipif(_np_version_under1p17, reason="NumPy<1.17"), + marks=pytest.mark.skipif(np_version_under1p17, reason="NumPy<1.17"), ), pytest.param( "np.random.PCG64", 11, - marks=pytest.mark.skipif(_np_version_under1p17, reason="NumPy<1.17"), + marks=pytest.mark.skipif(np_version_under1p17, reason="NumPy<1.17"), ), ], ) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e95e7267f17ec..11dc232af8de4 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -5,7 +5,7 @@ import pytest from pandas._libs import iNaT -from pandas.compat.numpy import _is_numpy_dev +from pandas.compat.numpy import is_numpy_dev from pandas.errors import InvalidIndexError from pandas.core.dtypes.common import is_datetime64tz_dtype @@ -475,7 +475,7 @@ def test_intersection_base(self, index, request): for case in cases: # https://github.com/pandas-dev/pandas/issues/35481 if ( - _is_numpy_dev + is_numpy_dev and isinstance(case, Series) and isinstance(index, UInt64Index) ): diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 9e4e73e793bac..d661a56311e6c 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat.numpy import _np_version_under1p17 +from pandas.compat.numpy import np_version_under1p17 import pandas as pd from pandas import Index, MultiIndex, date_range, period_range @@ -240,7 +240,7 @@ def test_numpy_ufuncs(idx, func): # test ufuncs of numpy. see: # https://numpy.org/doc/stable/reference/ufuncs.html - if _np_version_under1p17: + if np_version_under1p17: expected_exception = AttributeError msg = f"'tuple' object has no attribute '{func.__name__}'" else: diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 043539c173427..bd82cc36641ca 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -9,8 +9,8 @@ PeriodIndex, TimedeltaIndex, UInt64Index, - _np_version_under1p17, - _np_version_under1p18, + np_version_under1p17, + np_version_under1p18, ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -83,12 +83,12 @@ def test_numpy_ufuncs_other(index, func): if func in [np.isfinite, np.isnan, np.isinf]: pytest.xfail(reason="__array_ufunc__ is not defined") - if not _np_version_under1p18 and func in [np.isfinite, np.isinf, np.isnan]: + if not np_version_under1p18 and func in [np.isfinite, np.isinf, np.isnan]: # numpy 1.18(dev) changed isinf and isnan to not raise on dt64/tfd64 result = func(index) assert isinstance(result, np.ndarray) - elif not _np_version_under1p17 and func in [np.isfinite]: + elif not np_version_under1p17 and func in [np.isfinite]: # ok under numpy >= 1.17 # Results in bool array result = func(index) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 193800fae751f..7dad0e8592206 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from pandas.compat.numpy import _is_numpy_dev +from pandas.compat.numpy import is_numpy_dev import pandas as pd from pandas import DataFrame, Series, Timestamp, date_range @@ -947,7 +947,7 @@ def test_loc_setitem_empty_append(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) - @pytest.mark.xfail(_is_numpy_dev, reason="gh-35481") + @pytest.mark.xfail(is_numpy_dev, reason="gh-35481") def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index cb33f99d9bd91..cd064b25c2cd6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, compat, offsets +from pandas import NaT, Timedelta, Timestamp, compat, is_numpy_dev, offsets import pandas._testing as tm from pandas.core import ops @@ -426,7 +426,7 @@ def test_td_div_numeric_scalar(self): np.float64("NaN"), marks=pytest.mark.xfail( # Works on numpy dev only in python 3.9 - _is_numpy_dev and not compat.PY39, + is_numpy_dev and not compat.PY39, raises=RuntimeWarning, reason="https://github.com/pandas-dev/pandas/issues/31992", ), diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 3d45a1f7389b7..f7f3f1fa0c13d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from pandas.compat.numpy import _np_version_under1p17 +from pandas.compat.numpy import np_version_under1p17 import pandas as pd from pandas import Series, Timestamp @@ -72,7 +72,7 @@ def test_random_state(): # Check BitGenerators # GH32503 - if not _np_version_under1p17: + if not np_version_under1p17: assert ( com.random_state(npr.MT19937(3)).uniform() == npr.RandomState(npr.MT19937(3)).uniform() diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2368e93ddc256..da7f8b9b4a721 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -35,7 +35,7 @@ ) -@pytest.mark.skipif(not expr._USE_NUMEXPR, reason="not using numexpr") +@pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr") class TestExpressions: def setup_method(self, method): diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 94c252eca1671..e9deaf3fe67de 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -35,7 +35,7 @@ def test_foo(): from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import _np_version -from pandas.core.computation.expressions import _USE_NUMEXPR, NUMEXPR_INSTALLED +from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR def safe_import(mod_name: str, min_version: Optional[str] = None): @@ -195,8 +195,8 @@ def skip_if_no(package: str, min_version: Optional[str] = None): _skip_if_no_scipy(), reason="Missing SciPy requirement" ) skip_if_no_ne = pytest.mark.skipif( - not _USE_NUMEXPR, - reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}", + not USE_NUMEXPR, + reason=f"numexpr enabled->{USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}", ) From ffe2214923a783705c36abd981e7221d90f43e8d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Sep 2020 15:18:04 -0700 Subject: [PATCH 02/12] STY: check for private imports/lookups --- Makefile | 6 +++ ci/code_checks.sh | 13 +++++-- scripts/validate_unwanted_patterns.py | 54 ++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 4a9a48992f92f..778393030ac54 100644 --- a/Makefile +++ b/Makefile @@ -32,3 +32,9 @@ check: --included-file-extensions="py" \ --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \ pandas/ + + python3 scripts/validate_unwanted_patterns.py \ + --validation-type="private_import_across_module" \ + --included-file-extensions="py" \ + --excluded-file-paths=pandas/tests/,asv_bench/ + pandas/ diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 875f1dbb83ce3..27e1e8cd18939 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -116,11 +116,18 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then fi RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Check for use of private module attribute access' ; echo $MSG + MSG='Check for import of private attributes across modules' ; echo $MSG if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of private functions across modules' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + else + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" pandas/ fi RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 1a6d8cc8b9914..e3bdac9aa7e3b 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -18,6 +18,25 @@ import tokenize from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple +PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { + "_extension_array_shared_docs", + "_index_shared_docs", + "_merge_doc", + "_shared_docs", + "_new_Index", + "_new_PeriodIndex", + "_doc_template", + "_interval_shared_docs", + "_apply_docs", + "_arith_doc_FRAME", + "_flex_comp_doc_FRAME", + "_make_flex_doc", + "_op_descriptions", + "_pipe_template", + "_testing", + "_test_decorators", +} + def _get_literal_string_prefix_len(token_string: str) -> int: """ @@ -164,6 +183,36 @@ def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str yield (node.lineno, f"Private function '{module_name}.{function_name}'") +def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Checking that a private function is not imported across modules. + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + Yields + ------ + line_number : int + Line number of import statement, that imports the private function. + msg : str + Explenation of the error. + """ + contents = file_obj.read() + tree = ast.parse(contents) + + for node in ast.walk(tree): + if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)): + continue + + for module in node.names: + module_name = module.name.split(".")[-1] + if module_name in PRIVATE_IMPORTS_TO_IGNORE: + continue + + if module_name.startswith("_"): + yield (node.lineno, f"Import of internal function {repr(module_name)}") + + def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: """ This test case is necessary after 'Black' (https://github.com/psf/black), @@ -419,6 +468,7 @@ def main( available_validation_types: List[str] = [ "bare_pytest_raises", "private_function_across_module", + "private_import_across_module", "strings_to_concatenate", "strings_with_wrong_placed_whitespace", ] @@ -444,12 +494,12 @@ def main( parser.add_argument( "--included-file-extensions", default="py,pyx,pxd,pxi", - help="Coma seperated file extensions to check.", + help="Comma seperated file extensions to check.", ) parser.add_argument( "--excluded-file-paths", default="asv_bench/env", - help="Comma separated file extensions to check.", + help="Comma separated file paths to exclude.", ) args = parser.parse_args() From b2070423e4b0adb1c8c5726c9c86beed088aac87 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Sep 2020 14:56:19 -0700 Subject: [PATCH 03/12] ignore some docstring templates --- scripts/validate_unwanted_patterns.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index e3bdac9aa7e3b..07b47f84de59c 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -21,20 +21,23 @@ PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { "_extension_array_shared_docs", "_index_shared_docs", + "_interval_shared_docs", "_merge_doc", "_shared_docs", + "_apply_docs", "_new_Index", "_new_PeriodIndex", "_doc_template", - "_interval_shared_docs", - "_apply_docs", + "_agg_template", + "_pipe_template", + "_transform_template", "_arith_doc_FRAME", "_flex_comp_doc_FRAME", "_make_flex_doc", "_op_descriptions", - "_pipe_template", "_testing", "_test_decorators", + "__version__", # check np.__version__ in compat.numpy.function } From 52cfe40b3755def9734437f165247ee8f0e9d78d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Sep 2020 18:05:04 -0700 Subject: [PATCH 04/12] fix exclud-files --- scripts/validate_unwanted_patterns.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 07b47f84de59c..4fb423dc2ee70 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -35,6 +35,8 @@ "_flex_comp_doc_FRAME", "_make_flex_doc", "_op_descriptions", + "_test_parse_iso8601", + "_json_normalize", # TODO: remove after deprecation is enforced "_testing", "_test_decorators", "__version__", # check np.__version__ in compat.numpy.function From 4b7a9f802a902ff480cbe44c22d3e2303ae67117 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 12:34:25 -0700 Subject: [PATCH 05/12] ignores --- scripts/validate_unwanted_patterns.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 4fb423dc2ee70..5c4e534425c97 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -30,6 +30,8 @@ "_doc_template", "_agg_template", "_pipe_template", + "_get_version", + "__main__", "_transform_template", "_arith_doc_FRAME", "_flex_comp_doc_FRAME", From 8edb2699523e3e1bb8c860b061ed1603052dc7fc Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Sep 2020 16:05:03 -0700 Subject: [PATCH 06/12] exlcude more --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 778393030ac54..b915d8840cd8d 100644 --- a/Makefile +++ b/Makefile @@ -36,5 +36,5 @@ check: python3 scripts/validate_unwanted_patterns.py \ --validation-type="private_import_across_module" \ --included-file-extensions="py" \ - --excluded-file-paths=pandas/tests/,asv_bench/ + --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ From b739aa09dd2d2f9d38f4de869a445d3d936755c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Sep 2020 16:23:39 -0700 Subject: [PATCH 07/12] Revert unintentional --- pandas/__init__.py | 190 +++++++++++++++++++---------------- pandas/tests/api/test_api.py | 6 +- 2 files changed, 105 insertions(+), 91 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index bb4b54e62d7aa..70bb0c8a2cb51 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -20,9 +20,9 @@ # numpy compat from pandas.compat.numpy import ( - is_numpy_dev as _is_numpy_dev, np_version_under1p17 as _np_version_under1p17, np_version_under1p18 as _np_version_under1p18, + is_numpy_dev as _is_numpy_dev, ) try: @@ -37,134 +37,148 @@ ) from e from pandas._config import ( - describe_option, get_option, + set_option, + reset_option, + describe_option, option_context, options, - reset_option, - set_option, ) -from pandas.util._print_versions import show_versions -from pandas.util._tester import test - -import pandas.api -import pandas.arrays -from pandas.core.arrays.sparse import SparseDtype -from pandas.core.computation.api import eval - # let init-time option registration happen import pandas.core.config_init -from pandas.core.reshape.api import ( - concat, - crosstab, - cut, - get_dummies, - lreshape, - melt, - merge, - merge_asof, - merge_ordered, - pivot, - pivot_table, - qcut, - wide_to_long, -) -import pandas.testing - -from pandas.io.json import _json_normalize as json_normalize -from pandas.tseries import offsets -from pandas.tseries.api import infer_freq - -# use the closest tagged version if possible -from ._version import get_versions -from pandas.core.api import ( # dtype; missing; indexes; tseries; conversion; misc - NA, - BooleanDtype, - Categorical, - CategoricalDtype, - CategoricalIndex, - DataFrame, - DateOffset, - DatetimeIndex, - DatetimeTZDtype, - Flags, - Float64Index, - Grouper, - Index, - IndexSlice, +from pandas.core.api import ( + # dtype Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, - Int64Index, - Interval, - IntervalDtype, - IntervalIndex, - MultiIndex, - NamedAgg, - NaT, - Period, - PeriodDtype, - PeriodIndex, - RangeIndex, - Series, - StringDtype, - Timedelta, - TimedeltaIndex, - Timestamp, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype, - UInt64Index, - array, - bdate_range, - date_range, - factorize, - interval_range, + CategoricalDtype, + PeriodDtype, + IntervalDtype, + DatetimeTZDtype, + StringDtype, + BooleanDtype, + # missing + NA, isna, isnull, notna, notnull, + # indexes + Index, + CategoricalIndex, + Int64Index, + UInt64Index, + RangeIndex, + Float64Index, + MultiIndex, + IntervalIndex, + TimedeltaIndex, + DatetimeIndex, + PeriodIndex, + IndexSlice, + # tseries + NaT, + Period, period_range, - set_eng_float_format, + Timedelta, timedelta_range, - to_datetime, + Timestamp, + date_range, + bdate_range, + Interval, + interval_range, + DateOffset, + # conversion to_numeric, + to_datetime, to_timedelta, + # misc + Flags, + Grouper, + factorize, unique, value_counts, + NamedAgg, + array, + Categorical, + set_eng_float_format, + Series, + DataFrame, ) +from pandas.core.arrays.sparse import SparseDtype + +from pandas.tseries.api import infer_freq +from pandas.tseries import offsets + +from pandas.core.computation.api import eval -from pandas.io.api import ( # excel; parsers; pickle; pytables; sql; misc +from pandas.core.reshape.api import ( + concat, + lreshape, + melt, + wide_to_long, + merge, + merge_asof, + merge_ordered, + crosstab, + pivot, + pivot_table, + get_dummies, + cut, + qcut, +) + +import pandas.api +from pandas.util._print_versions import show_versions + +from pandas.io.api import ( + # excel ExcelFile, ExcelWriter, + read_excel, + # parsers + read_csv, + read_fwf, + read_table, + # pickle + read_pickle, + to_pickle, + # pytables HDFStore, + read_hdf, + # sql + read_sql, + read_sql_query, + read_sql_table, + # misc read_clipboard, - read_csv, - read_excel, + read_parquet, + read_orc, read_feather, - read_fwf, read_gbq, - read_hdf, read_html, read_json, - read_orc, - read_parquet, - read_pickle, + read_stata, read_sas, read_spss, - read_sql, - read_sql_query, - read_sql_table, - read_stata, - read_table, - to_pickle, ) +from pandas.io.json import _json_normalize as json_normalize + +from pandas.util._tester import test +import pandas.testing +import pandas.arrays + +# use the closest tagged version if possible +from ._version import get_versions v = get_versions() __version__ = v.get("closest-tag", v["version"]) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 1e53214e5d517..54da13c3c620b 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -189,9 +189,9 @@ class TestPDApi(Base): "_hashtable", "_lib", "_libs", - "np_version_under1p17", - "np_version_under1p18", - "is_numpy_dev", + "_np_version_under1p17", + "_np_version_under1p18", + "_is_numpy_dev", "_testing", "_tslib", "_typing", From d023b99c3fcacb4d95528b706b305c41c394a12b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Sep 2020 16:36:14 -0700 Subject: [PATCH 08/12] revert accidnetal --- asv_bench/benchmarks/sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 15d450d987f9d..28ceb25eebd96 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -71,7 +71,7 @@ def setup(self): s.index = MultiIndex.from_product([range(10)] * 4) self.ss = s.astype("Sparse") - def timesparse_series_to_coo(self): + def time_sparse_series_to_coo(self): self.ss.sparse.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True) From e4b50a2c62206b5d85c99f948ecec4204fdeae8d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Sep 2020 16:37:10 -0700 Subject: [PATCH 09/12] revert unnecesary --- pandas/core/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index c961c5fe1aa9d..44f434e038a4b 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -19,7 +19,7 @@ ensure_index_from_sequences, get_objs_combined_axis, ) -from pandas.core.indexes.multi import sparsify_labels as _sparsify # noqa:F401 +from pandas.core.indexes.multi import sparsify_labels # noqa:F401 # GH#30193 warnings.warn( From 00bbbac43a31b48f0aefd1312febae04ab7c0d4a Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Sep 2020 15:06:01 -0700 Subject: [PATCH 10/12] de-privatize --- pandas/core/arrays/datetimelike.py | 10 +++++----- pandas/core/arrays/integer.py | 8 ++++---- pandas/core/dtypes/cast.py | 6 ++++-- pandas/core/indexes/datetimes.py | 4 ++-- pandas/io/formats/format.py | 10 +++++----- scripts/validate_unwanted_patterns.py | 6 ++++++ 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6302b48cb1978..b013246e724de 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -54,7 +54,7 @@ from pandas.core import missing, nanops, ops from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts -from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionOpsMixin import pandas.core.common as com from pandas.core.construction import array, extract_array @@ -472,11 +472,11 @@ class DatetimeLikeArrayMixin( def _ndarray(self) -> np.ndarray: return self._data - def _from_backing_data(self: _T, arr: np.ndarray) -> _T: + def _from_backing_data( + self: DatetimeLikeArrayT, arr: np.ndarray + ) -> DatetimeLikeArrayT: # Note: we do not retain `freq` - return type(self)._simple_new( # type: ignore[attr-defined] - arr, dtype=self.dtype - ) + return type(self)._simple_new(arr, dtype=self.dtype) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d83ff91a1315f..dc08e018397bc 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -106,7 +106,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], [] ) if np.issubdtype(np_dtype, np.integer): - return _dtypes[str(np_dtype)] + return STR_TO_DTYPE[str(np_dtype)] return None def __from_arrow__( @@ -214,7 +214,7 @@ def coerce_to_array( if not issubclass(type(dtype), _IntegerDtype): try: - dtype = _dtypes[str(np.dtype(dtype))] + dtype = STR_TO_DTYPE[str(np.dtype(dtype))] except KeyError as err: raise ValueError(f"invalid dtype specified {dtype}") from err @@ -354,7 +354,7 @@ class IntegerArray(BaseMaskedArray): @cache_readonly def dtype(self) -> _IntegerDtype: - return _dtypes[str(self._data.dtype)] + return STR_TO_DTYPE[str(self._data.dtype)] def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]): @@ -735,7 +735,7 @@ class UInt64Dtype(_IntegerDtype): __doc__ = _dtype_docstring.format(dtype="uint64") -_dtypes: Dict[str, _IntegerDtype] = { +STR_TO_DTYPE: Dict[str, _IntegerDtype] = { "int8": Int8Dtype(), "int16": Int16Dtype(), "int32": Int32Dtype(), diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ba1b0b075936d..64ccc0be0a25d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1151,9 +1151,11 @@ def convert_dtypes( target_int_dtype = "Int64" if is_integer_dtype(input_array.dtype): - from pandas.core.arrays.integer import _dtypes + from pandas.core.arrays.integer import STR_TO_DTYPE - inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) + inferred_dtype = STR_TO_DTYPE.get( + input_array.dtype.name, target_int_dtype + ) if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( input_array.dtype ): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f0b80c2852bd5..f269495f6011a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -312,9 +312,9 @@ def _is_dates_only(self) -> bool: ------- bool """ - from pandas.io.formats.format import _is_dates_only + from pandas.io.formats.format import is_dates_only - return self.tz is None and _is_dates_only(self._values) + return self.tz is None and is_dates_only(self._values) def __reduce__(self): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6781d98ded41d..77f2a53fc7fab 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1586,7 +1586,7 @@ def format_percentiles( return [i + "%" for i in out] -def _is_dates_only( +def is_dates_only( values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] ) -> bool: # return a boolean if we are only dates (and don't have a timezone) @@ -1658,8 +1658,8 @@ def get_format_datetime64_from_values( # only accepts 1D values values = values.ravel() - is_dates_only = _is_dates_only(values) - if is_dates_only: + ido = is_dates_only(values) + if ido: return date_format or "%Y-%m-%d" return date_format @@ -1668,9 +1668,9 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> List[str]: """ we by definition have a TZ """ values = self.values.astype(object) - is_dates_only = _is_dates_only(values) + ido = is_dates_only(values) formatter = self.formatter or get_format_datetime64( - is_dates_only, date_format=self.date_format + ido, date_format=self.date_format ) fmt_values = [formatter(x) for x in values] diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 5c4e534425c97..6bcc855ee6145 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -37,6 +37,12 @@ "_flex_comp_doc_FRAME", "_make_flex_doc", "_op_descriptions", + "_use_inf_as_na", + "_get_plot_backend", + "_matplotlib", + "_arrow_utils", + "_registry", + "_get_offset", # TODO: remove after get_offset deprecation enforced "_test_parse_iso8601", "_json_normalize", # TODO: remove after deprecation is enforced "_testing", From f7c62d1d4423af507be3e1e3b928877b660b56d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 08:11:00 -0700 Subject: [PATCH 11/12] de-privatuze --- ci/code_checks.sh | 1 + pandas/core/groupby/groupby.py | 6 +++--- pandas/core/resample.py | 9 +++++++-- pandas/core/window/ewm.py | 4 ++-- pandas/core/window/expanding.py | 4 ++-- pandas/core/window/rolling.py | 6 +++--- scripts/validate_unwanted_patterns.py | 1 + 7 files changed, 19 insertions(+), 12 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 27e1e8cd18939..3ad2785cb887f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -121,6 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ else $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + fi RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for use of private functions across modules' ; echo $MSG diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1e3e56f4ff09f..8a55d438cf8d4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -459,7 +459,7 @@ def f(self): @contextmanager -def group_selection_context(groupby: "_GroupBy"): +def group_selection_context(groupby: "BaseGroupBy"): """ Set / reset the group_selection_context. """ @@ -479,7 +479,7 @@ def group_selection_context(groupby: "_GroupBy"): ] -class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): +class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): _group_selection = None _apply_allowlist: FrozenSet[str] = frozenset() @@ -1212,7 +1212,7 @@ def _apply_filter(self, indices, dropna): OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame) -class GroupBy(_GroupBy[FrameOrSeries]): +class GroupBy(BaseGroupBy[FrameOrSeries]): """ Class for grouping and aggregating relational data. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 7b5154756e613..44848e4d43909 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -26,7 +26,12 @@ from pandas.core.generic import NDFrame, _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.groupby.generic import SeriesGroupBy -from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby +from pandas.core.groupby.groupby import ( + BaseGroupBy, + GroupBy, + _pipe_template, + get_groupby, +) from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper from pandas.core.indexes.api import Index @@ -40,7 +45,7 @@ _shared_docs_kwargs: Dict[str, str] = dict() -class Resampler(_GroupBy, ShallowMixin): +class Resampler(BaseGroupBy, ShallowMixin): """ Class for resampling datetimelike data, a groupby-like operation. See aggregate, transform, and apply functions on this object. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2bd36d8bff155..4282cb41c4e91 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -15,7 +15,7 @@ import pandas.core.common as common from pandas.core.window.common import _doc_template, _shared_docs, zsqrt -from pandas.core.window.rolling import _Rolling, flex_binary_moment +from pandas.core.window.rolling import RollingMixin, flex_binary_moment _bias_template = """ Parameters @@ -60,7 +60,7 @@ def get_center_of_mass( return float(comass) -class ExponentialMovingWindow(_Rolling): +class ExponentialMovingWindow(RollingMixin): r""" Provide exponential weighted (EW) functions. diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index ce4ab2f98c23d..46e002324ec75 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -5,10 +5,10 @@ from pandas.util._decorators import Appender, Substitution, doc from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs -from pandas.core.window.rolling import _Rolling_and_Expanding +from pandas.core.window.rolling import RollingAndExpandingMixin -class Expanding(_Rolling_and_Expanding): +class Expanding(RollingAndExpandingMixin): """ Provide expanding transformations. diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5a7482076903c..648ab4d25be83 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1214,13 +1214,13 @@ def std(self, ddof=1, *args, **kwargs): return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) -class _Rolling(_Window): +class RollingMixin(_Window): @property def _constructor(self): return Rolling -class _Rolling_and_Expanding(_Rolling): +class RollingAndExpandingMixin(RollingMixin): _shared_docs["count"] = dedent( r""" @@ -1917,7 +1917,7 @@ def _get_corr(a, b): ) -class Rolling(_Rolling_and_Expanding): +class Rolling(RollingAndExpandingMixin): @cache_readonly def is_datetimelike(self) -> bool: return isinstance( diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index ecb2a12bf03aa..4a0e859535215 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -37,6 +37,7 @@ "_flex_comp_doc_FRAME", "_make_flex_doc", "_op_descriptions", + "_IntegerDtype", "_use_inf_as_na", "_get_plot_backend", "_matplotlib", From 840e39ccff9e2c0d968769b21c142355aaeb9ce7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 08:31:34 -0700 Subject: [PATCH 12/12] ignore test files --- ci/code_checks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3ad2785cb887f..54aa830379c07 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -126,9 +126,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then MSG='Check for use of private functions across modules' ; echo $MSG if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/ else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ fi RET=$(($RET + $?)) ; echo $MSG "DONE"