From 155639b16b5006cec1be3dd7ec117e929f1e2ceb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 15:46:07 -0700 Subject: [PATCH 1/8] Fix fields functions with readonly data, vaex#357 --- pandas/_libs/tslibs/fields.pyx | 10 +++++----- pandas/tests/tslibs/test_fields.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 pandas/tests/tslibs/test_fields.py diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2a41b5ff2339c..2ed85595f7e3a 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -45,7 +45,7 @@ def get_time_micros(ndarray[int64_t] dtindex): @cython.wraparound(False) @cython.boundscheck(False) -def build_field_sarray(int64_t[:] dtindex): +def build_field_sarray(const int64_t[:] dtindex): """ Datetime as int64 representation to a structured array of fields """ @@ -87,7 +87,7 @@ def build_field_sarray(int64_t[:] dtindex): @cython.wraparound(False) @cython.boundscheck(False) -def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): +def get_date_name_field(const int64_t[:] dtindex, object field, object locale=None): """ Given a int64-based datetime index, return array of strings of date name based on requested field (e.g. weekday_name) @@ -137,7 +137,7 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): @cython.wraparound(False) @cython.boundscheck(False) -def get_start_end_field(int64_t[:] dtindex, object field, +def get_start_end_field(const int64_t[:] dtindex, object field, object freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators @@ -380,7 +380,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(int64_t[:] dtindex, object field): +def get_date_field(const int64_t[:] dtindex, object field): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. @@ -542,7 +542,7 @@ def get_date_field(int64_t[:] dtindex, object field): @cython.wraparound(False) @cython.boundscheck(False) -def get_timedelta_field(int64_t[:] tdindex, object field): +def get_timedelta_field(const int64_t[:] tdindex, object field): """ Given a int64-based timedelta index, extract the days, hrs, sec., field and return an array of these values. diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py new file mode 100644 index 0000000000000..7a01d1bca568d --- /dev/null +++ b/pandas/tests/tslibs/test_fields.py @@ -0,0 +1,29 @@ +import numpy as np + +from pandas._libs.tslibs import fields +import pandas.util.testing as tm + + +def test_fields_readonly(): + # https://github.com/vaexio/vaex/issues/357 + # fields functions should't raise when we pass read-only data + dtindex = np.arange(5, dtype=np.int64) * 10**9 * 3600 * 24 * 32 + dtindex.flags.writeable = False + + result = fields.get_date_name_field(dtindex, "month_name") + expected = np.array(['January', 'February', 'March', 'April', 'May'], + dtype=np.object) + tm.assert_numpy_array_equal(result, expected) + + result = fields.get_date_field(dtindex, "Y") + expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) + tm.assert_numpy_array_equal(result, expected) + + result = fields.get_start_end_field(dtindex, "is_month_start", None) + expected = np.array([True, False, False, False, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + # treat dtindex as timedeltas for this next one + result = fields.get_timedelta_field(dtindex, "days") + expected = np.arange(5, dtype=np.int32) * 32 + tm.assert_numpy_array_equal(result, expected) From df6f11f614efdcf4384e43e4b131a8f354413b9d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 16:51:17 -0700 Subject: [PATCH 2/8] lint fixup --- pandas/tests/tslibs/test_fields.py | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index 7a01d1bca568d..bd57758f31a75 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -5,25 +5,26 @@ def test_fields_readonly(): - # https://github.com/vaexio/vaex/issues/357 - # fields functions should't raise when we pass read-only data - dtindex = np.arange(5, dtype=np.int64) * 10**9 * 3600 * 24 * 32 - dtindex.flags.writeable = False + # https://github.com/vaexio/vaex/issues/357 + # fields functions should't raise when we pass read-only data + dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32 + dtindex.flags.writeable = False - result = fields.get_date_name_field(dtindex, "month_name") - expected = np.array(['January', 'February', 'March', 'April', 'May'], - dtype=np.object) - tm.assert_numpy_array_equal(result, expected) + result = fields.get_date_name_field(dtindex, "month_name") + expected = np.array( + ["January", "February", "March", "April", "May"], dtype=np.object + ) + tm.assert_numpy_array_equal(result, expected) - result = fields.get_date_field(dtindex, "Y") - expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) - tm.assert_numpy_array_equal(result, expected) + result = fields.get_date_field(dtindex, "Y") + expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) + tm.assert_numpy_array_equal(result, expected) - result = fields.get_start_end_field(dtindex, "is_month_start", None) - expected = np.array([True, False, False, False, False], dtype=np.bool_) - tm.assert_numpy_array_equal(result, expected) + result = fields.get_start_end_field(dtindex, "is_month_start", None) + expected = np.array([True, False, False, False, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) - # treat dtindex as timedeltas for this next one - result = fields.get_timedelta_field(dtindex, "days") - expected = np.arange(5, dtype=np.int32) * 32 - tm.assert_numpy_array_equal(result, expected) + # treat dtindex as timedeltas for this next one + result = fields.get_timedelta_field(dtindex, "days") + expected = np.arange(5, dtype=np.int32) * 32 + tm.assert_numpy_array_equal(result, expected) From 74d8270f0a6dc501e6270cf4629224ca20b3968a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 19:40:42 -0700 Subject: [PATCH 3/8] cleanups --- pandas/_libs/missing.pyx | 7 ++----- pandas/core/generic.py | 14 +++++++------- pandas/core/groupby/generic.py | 17 ++++++++--------- pandas/core/internals/blocks.py | 3 +-- pandas/core/reshape/pivot.py | 2 -- pandas/core/sorting.py | 10 +++++----- pandas/core/window.py | 6 ++---- 7 files changed, 25 insertions(+), 34 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 1d756115ebd5a..052b081988c9e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -80,11 +80,8 @@ cpdef bint checknull_old(object val): cdef inline bint _check_none_nan_inf_neginf(object val): - try: - return val is None or (isinstance(val, float) and - (val != val or val == INF or val == NEGINF)) - except ValueError: - return False + return val is None or (isinstance(val, float) and + (val != val or val == INF or val == NEGINF)) @cython.wraparound(False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0afd42e406c1f..d9cd6b67787c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4893,12 +4893,12 @@ def sample( if weights is not None: # If a series, align with frame - if isinstance(weights, pd.Series): + if isinstance(weights, ABCSeries): weights = weights.reindex(self.axes[axis]) # Strings acceptable if a dataframe and axis = 0 if isinstance(weights, str): - if isinstance(self, pd.DataFrame): + if isinstance(self, ABCDataFrame): if axis == 0: try: weights = self[weights] @@ -6629,7 +6629,7 @@ def replace( to_replace = [to_replace] if isinstance(to_replace, (tuple, list)): - if isinstance(self, pd.DataFrame): + if isinstance(self, ABCDataFrame): return self.apply( _single_replace, args=(to_replace, method, inplace, limit) ) @@ -7422,7 +7422,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # be transformed to NDFrame from other array like structure. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): if isinstance(self, ABCSeries): - threshold = pd.Series(threshold, index=self.index) + threshold = self._constructor(threshold, index=self.index) else: threshold = _align_method_FRAME(self, threshold, axis) return self.where(subset, threshold, axis=axis, inplace=inplace) @@ -7511,9 +7511,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs # so ignore # GH 19992 # numpy doesn't drop a list-like bound containing NaN - if not is_list_like(lower) and np.any(pd.isnull(lower)): + if not is_list_like(lower) and np.any(pd.isna(lower)): lower = None - if not is_list_like(upper) and np.any(pd.isnull(upper)): + if not is_list_like(upper) and np.any(pd.isna(upper)): upper = None # GH 2747 (arguments were reversed) @@ -8986,7 +8986,7 @@ def _where( msg = "Boolean array expected for the condition, not {dtype}" - if not isinstance(cond, pd.DataFrame): + if not isinstance(cond, ABCDataFrame): # This is a single-dimensional object. if not is_bool_dtype(cond): raise ValueError(msg.format(dtype=cond.dtype)) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5b9cec6903749..8ea9124b0e98e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -35,7 +35,7 @@ is_object_dtype, is_scalar, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import _isna_ndarraylike, isna, notna from pandas._typing import FrameOrSeries import pandas.core.algorithms as algorithms @@ -45,7 +45,7 @@ from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs from pandas.core.groupby import base from pandas.core.groupby.groupby import GroupBy, _apply_docs, _transform_template -from pandas.core.index import Index, MultiIndex +from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series @@ -53,6 +53,8 @@ from pandas.plotting import boxplot_frame_groupby +from .groupby import groupby + NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) # TODO(typing) the return value on this callable should be any *scalar*. AggScalar = Union[str, Callable[..., Any]] @@ -162,8 +164,6 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): continue # call our grouper again with only this block - from pandas.core.groupby.groupby import groupby - obj = self.obj[data.items[locs]] s = groupby(obj, self.grouper) try: @@ -348,8 +348,6 @@ def _decide_output_index(self, output, labels): return output_keys def _wrap_applied_output(self, keys, values, not_indexed_same=False): - from pandas.core.index import _all_indexes_same - if len(keys) == 0: return DataFrame(index=keys) @@ -1590,13 +1588,14 @@ def count(self): DataFrame Count of values within each group. """ - from pandas.core.dtypes.missing import _isna_ndarraylike as _isna - data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info mask = ids != -1 - val = ((mask & ~_isna(np.atleast_2d(blk.get_values()))) for blk in data.blocks) + val = ( + (mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values()))) + for blk in data.blocks + ) loc = (blk.mgr_locs for blk in data.blocks) counter = partial(lib.count_level_2d, labels=ids, max_bin=ngroups, axis=1) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f0e7893435f2b..82fee5c9e36d7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -75,6 +75,7 @@ PandasDtype, TimedeltaArray, ) +from pandas.core.arrays.categorical import _maybe_to_categorical from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexers import ( @@ -3085,8 +3086,6 @@ class CategoricalBlock(ExtensionBlock): _concatenator = staticmethod(concat_categorical) def __init__(self, values, placement, ndim=None): - from pandas.core.arrays.categorical import _maybe_to_categorical - # coerce to categorical if we can super().__init__(_maybe_to_categorical(values), placement=placement, ndim=ndim) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 2bdef766a3434..79716520f6654 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -127,8 +127,6 @@ def pivot_table( table = agged.unstack(to_unstack) if not dropna: - from pandas import MultiIndex - if table.index.nlevels > 1: m = MultiIndex.from_arrays( cartesian_product(table.index.levels), names=table.index.names diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5f3ed87424d0e..1ab6c792c6402 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -202,19 +202,19 @@ def lexsort_indexer(keys, orders=None, na_position="last"): # we are already a Categorical if is_categorical_dtype(key): - c = key + cat = key # create the Categorical else: - c = Categorical(key, ordered=True) + cat = Categorical(key, ordered=True) if na_position not in ["last", "first"]: raise ValueError("invalid na_position: {!r}".format(na_position)) - n = len(c.categories) - codes = c.codes.copy() + n = len(cat.categories) + codes = cat.codes.copy() - mask = c.codes == -1 + mask = cat.codes == -1 if order: # ascending if na_position == "last": codes = np.where(mask, n, codes) diff --git a/pandas/core/window.py b/pandas/core/window.py index 5098ab3c7220f..14d109ccf6a9c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -40,6 +40,7 @@ import pandas.core.common as com from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin +from pandas.core.index import Index, MultiIndex, ensure_index _shared_docs = dict(**_shared_docs) _doc_template = """ @@ -281,7 +282,6 @@ def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: """ from pandas import Series, concat - from pandas.core.index import ensure_index final = [] for result, block in zip(results, blocks): @@ -1691,8 +1691,6 @@ def _on(self): if self.on is None: return self.obj.index elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: - from pandas import Index - return Index(self.obj[self.on]) else: raise ValueError( @@ -2670,7 +2668,7 @@ def dataframe_from_int_dict(data, frame_template): *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) ) - from pandas import MultiIndex, concat + from pandas import concat result_index = arg1.index.union(arg2.index) if len(result_index): From 714b878f1ac105c49e4028ed41db6980d60180e2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 15:14:51 -0700 Subject: [PATCH 4/8] revert things that are in deps2 --- pandas/_libs/tslibs/fields.pyx | 10 +++++----- pandas/tests/tslibs/test_fields.py | 30 ------------------------------ 2 files changed, 5 insertions(+), 35 deletions(-) delete mode 100644 pandas/tests/tslibs/test_fields.py diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2ed85595f7e3a..2a41b5ff2339c 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -45,7 +45,7 @@ def get_time_micros(ndarray[int64_t] dtindex): @cython.wraparound(False) @cython.boundscheck(False) -def build_field_sarray(const int64_t[:] dtindex): +def build_field_sarray(int64_t[:] dtindex): """ Datetime as int64 representation to a structured array of fields """ @@ -87,7 +87,7 @@ def build_field_sarray(const int64_t[:] dtindex): @cython.wraparound(False) @cython.boundscheck(False) -def get_date_name_field(const int64_t[:] dtindex, object field, object locale=None): +def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): """ Given a int64-based datetime index, return array of strings of date name based on requested field (e.g. weekday_name) @@ -137,7 +137,7 @@ def get_date_name_field(const int64_t[:] dtindex, object field, object locale=No @cython.wraparound(False) @cython.boundscheck(False) -def get_start_end_field(const int64_t[:] dtindex, object field, +def get_start_end_field(int64_t[:] dtindex, object field, object freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators @@ -380,7 +380,7 @@ def get_start_end_field(const int64_t[:] dtindex, object field, @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, object field): +def get_date_field(int64_t[:] dtindex, object field): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. @@ -542,7 +542,7 @@ def get_date_field(const int64_t[:] dtindex, object field): @cython.wraparound(False) @cython.boundscheck(False) -def get_timedelta_field(const int64_t[:] tdindex, object field): +def get_timedelta_field(int64_t[:] tdindex, object field): """ Given a int64-based timedelta index, extract the days, hrs, sec., field and return an array of these values. diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py deleted file mode 100644 index bd57758f31a75..0000000000000 --- a/pandas/tests/tslibs/test_fields.py +++ /dev/null @@ -1,30 +0,0 @@ -import numpy as np - -from pandas._libs.tslibs import fields -import pandas.util.testing as tm - - -def test_fields_readonly(): - # https://github.com/vaexio/vaex/issues/357 - # fields functions should't raise when we pass read-only data - dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32 - dtindex.flags.writeable = False - - result = fields.get_date_name_field(dtindex, "month_name") - expected = np.array( - ["January", "February", "March", "April", "May"], dtype=np.object - ) - tm.assert_numpy_array_equal(result, expected) - - result = fields.get_date_field(dtindex, "Y") - expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) - tm.assert_numpy_array_equal(result, expected) - - result = fields.get_start_end_field(dtindex, "is_month_start", None) - expected = np.array([True, False, False, False, False], dtype=np.bool_) - tm.assert_numpy_array_equal(result, expected) - - # treat dtindex as timedeltas for this next one - result = fields.get_timedelta_field(dtindex, "days") - expected = np.arange(5, dtype=np.int32) * 32 - tm.assert_numpy_array_equal(result, expected) From bf29a7c93fb8db93ae81822b561f564bcafbf6c2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 18:00:30 -0700 Subject: [PATCH 5/8] remove maybe_to_categorical --- pandas/core/arrays/categorical.py | 13 ------------- pandas/core/internals/blocks.py | 5 +++-- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6200cd14663f8..8adbc0fbd6d35 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -166,19 +166,6 @@ def f(self, other): return f -def _maybe_to_categorical(array): - """ - Coerce to a categorical if a series is given. - - Internal use ONLY. - """ - if isinstance(array, (ABCSeries, ABCCategoricalIndex)): - return array._values - elif isinstance(array, np.ndarray): - return Categorical(array) - return array - - def contains(cat, key, container): """ Helper for membership check for ``key`` in ``cat``. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 82fee5c9e36d7..29b1942e9d9b4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -75,7 +75,6 @@ PandasDtype, TimedeltaArray, ) -from pandas.core.arrays.categorical import _maybe_to_categorical from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexers import ( @@ -3087,7 +3086,9 @@ class CategoricalBlock(ExtensionBlock): def __init__(self, values, placement, ndim=None): # coerce to categorical if we can - super().__init__(_maybe_to_categorical(values), placement=placement, ndim=ndim) + values = extract_array(values) + assert isinstance(values, Categorical), type(values) + super().__init__(values, placement=placement, ndim=ndim) @property def _holder(self): From 5bc46ec8b22fe7890d44e350890dd3c1e1ea7094 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 20:38:44 -0700 Subject: [PATCH 6/8] cln --- pandas/core/arrays/categorical.py | 18 ------------------ pandas/core/arrays/sparse.py | 13 ------------- pandas/core/sparse/series.py | 5 +++++ 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8adbc0fbd6d35..08e247da4cb3b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,7 +39,6 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.generic import ( - ABCCategoricalIndex, ABCDataFrame, ABCIndexClass, ABCSeries, @@ -1975,23 +1974,6 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): take = take_nd - def _slice(self, slicer): - """ - Return a slice of myself. - - For internal compatibility with numpy arrays. - """ - - # only allow 1 dimensional slicing, but can - # in a 2-d case be passd (slice(None),....) - if isinstance(slicer, tuple) and len(slicer) == 2: - if not com.is_null_slice(slicer[0]): - raise AssertionError("invalid slicing for a 1-ndim " "categorical") - slicer = slicer[1] - - codes = self._codes[slicer] - return self._constructor(values=codes, dtype=self.dtype, fastpath=True) - def __len__(self): """ The length of this Categorical. diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9376b49112f6f..ee3652a211e31 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -601,10 +601,6 @@ def __init__( dtype=None, copy=False, ): - from pandas.core.internals import SingleBlockManager - - if isinstance(data, SingleBlockManager): - data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value @@ -1859,15 +1855,6 @@ def _formatter(self, boxed=False): SparseArray._add_unary_ops() -def _maybe_to_dense(obj): - """ - try to convert to dense - """ - if hasattr(obj, "to_dense"): - return obj.to_dense() - return obj - - def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index f5d39c47150a2..0bd20e75d17ec 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -114,6 +114,11 @@ def __init__( elif is_scalar(data) and index is not None: data = np.full(len(index), fill_value=data) + if isinstance(data, SingleBlockManager): + # SparseArray doesn't accept SingleBlockManager + index = data.index + data = data.blocks[0].values + super().__init__( SparseArray( data, From 46898efc77893c030026c792e5f3a664eee42642 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 20:40:25 -0700 Subject: [PATCH 7/8] isort --- pandas/core/arrays/categorical.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 08e247da4cb3b..dd71daab9d4c5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -38,11 +38,7 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCIndexClass, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna From 925ce8103c34e26e828b9f309531d3245db268cd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 06:27:15 -0700 Subject: [PATCH 8/8] comments --- pandas/core/generic.py | 4 ++-- pandas/core/groupby/generic.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7291f113dc6f2..1a854af52c20e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7510,9 +7510,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs # so ignore # GH 19992 # numpy doesn't drop a list-like bound containing NaN - if not is_list_like(lower) and np.any(pd.isna(lower)): + if not is_list_like(lower) and np.any(isna(lower)): lower = None - if not is_list_like(upper) and np.any(pd.isna(upper)): + if not is_list_like(upper) and np.any(isna(upper)): upper = None # GH 2747 (arguments were reversed) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8ea9124b0e98e..b886b7e305ed0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,12 @@ from pandas.core.frame import DataFrame from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs from pandas.core.groupby import base -from pandas.core.groupby.groupby import GroupBy, _apply_docs, _transform_template +from pandas.core.groupby.groupby import ( + GroupBy, + _apply_docs, + _transform_template, + groupby, +) from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block @@ -53,8 +58,6 @@ from pandas.plotting import boxplot_frame_groupby -from .groupby import groupby - NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) # TODO(typing) the return value on this callable should be any *scalar*. AggScalar = Union[str, Callable[..., Any]]