From ac48f0145bfaf0a80c0080b3e527ae8392239281 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 14:41:10 -0700 Subject: [PATCH 1/3] CLN: collected cleanups from other branches --- pandas/_libs/index.pyx | 4 -- pandas/core/arrays/numpy_.py | 6 ++- pandas/core/arrays/timedeltas.py | 4 +- pandas/core/generic.py | 6 +-- pandas/core/indexes/base.py | 2 +- pandas/core/internals/blocks.py | 54 +++++++++++++++------- pandas/core/internals/managers.py | 2 +- pandas/core/resample.py | 9 ++-- pandas/core/window.py | 2 + pandas/tests/arithmetic/test_datetime64.py | 1 + pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/io/test_parquet.py | 4 ++ 12 files changed, 61 insertions(+), 35 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f704ceffa662e..7424c4ddc3d92 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -47,10 +47,6 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): return util.get_value_at(arr, loc) -def get_value_box(arr: ndarray, loc: object) -> object: - return get_value_at(arr, loc, tz=None) - - # Don't populate hash tables in monotonic indexes larger than this _SIZE_CUTOFF = 1000000 diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 39529177b9e35..667fb4501ed95 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -125,7 +125,11 @@ def __init__(self, values, copy=False): if isinstance(values, type(self)): values = values._ndarray if not isinstance(values, np.ndarray): - raise ValueError("'values' must be a NumPy array.") + raise ValueError( + "'values' must be a NumPy array, not {typ}".format( + typ=type(values).__name__ + ) + ) if values.ndim != 1: raise ValueError("PandasArray must be 1-dimensional.") diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index dd0b9a79c6dca..60918bca0187a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -173,8 +173,8 @@ class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): "ceil", ] - # Needed so that NaT.__richcmp__(DateTimeArray) operates pointwise - ndim = 1 + # Note: ndim must be defined to ensure NaT.__richcmp(TimedeltaArray) + # operates pointwise. @property def _box_func(self): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1d87a6937ca34..e4a9fb69f524d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3560,7 +3560,7 @@ def _iget_item_cache(self, item): def _box_item_values(self, key, values): raise AbstractMethodError(self) - def _slice(self, slobj, axis=0, kind=None): + def _slice(self, slobj: slice, axis=0, kind=None): """ Construct a slice of this container. @@ -6187,8 +6187,6 @@ def fillna( axis = 0 axis = self._get_axis_number(axis) - from pandas import DataFrame - if value is None: if self._is_mixed_type and axis == 1: @@ -6251,7 +6249,7 @@ def fillna( new_data = self._data.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) - elif isinstance(value, DataFrame) and self.ndim == 2: + elif isinstance(value, ABCDataFrame) and self.ndim == 2: new_data = self.where(self.notna(), value) else: raise ValueError("invalid fill value with a %s" % type(value)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7b73a92b18a..19ef4b51b23cd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4698,7 +4698,7 @@ def get_value(self, series, key): raise try: - return libindex.get_value_box(s, key) + return libindex.get_value_at(s, key) except IndexError: raise except TypeError: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6d70fcfb62d52..eba319115820d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -437,7 +437,7 @@ def f(m, v, i): return self.split_and_operate(mask, f, inplace) - def split_and_operate(self, mask, f, inplace): + def split_and_operate(self, mask, f, inplace: bool): """ split the block per-column, and apply the callable f per-column, return a new block for each. Handle @@ -496,17 +496,15 @@ def make_a_block(nv, ref_loc): return new_blocks - def _maybe_downcast(self, blocks, downcast=None): + def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: # no need to downcast our float # unless indicated - if downcast is None and self.is_float: - return blocks - elif downcast is None and (self.is_timedelta or self.is_datetime): + if downcast is None and ( + self.is_float or self.is_timedelta or self.is_datetime + ): return blocks - if not isinstance(blocks, list): - blocks = [blocks] return _extend_blocks([b.downcast(downcast) for b in blocks]) def downcast(self, dtypes=None): @@ -1351,7 +1349,15 @@ def shift(self, periods, axis=0, fill_value=None): return [self.make_block(new_values)] - def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0): + def where( + self, + other, + cond, + align=True, + errors="raise", + try_cast: bool = False, + axis: int = 0, + ) -> List["Block"]: """ evaluate the block; return result block(s) from the result @@ -1450,7 +1456,7 @@ def func(cond, values, other): if try_cast: result = self._try_cast_result(result) - return self.make_block(result) + return [self.make_block(result)] # might need to separate out blocks axis = cond.ndim - 1 @@ -1482,9 +1488,9 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): new_columns : Index All columns of the unstacked BlockManager. n_rows : int - Only used in ExtensionBlock.unstack + Only used in ExtensionBlock._unstack fill_value : int - Only used in ExtensionBlock.unstack + Only used in ExtensionBlock._unstack Returns ------- @@ -1558,7 +1564,7 @@ def quantile(self, qs, interpolation="linear", axis=0): result = result[..., 0] result = lib.item_from_zerodim(result) - ndim = getattr(result, "ndim", None) or 0 + ndim = np.ndim(result) return make_block(result, placement=np.arange(len(result)), ndim=ndim) def _replace_coerce( @@ -1946,7 +1952,15 @@ def shift( ) ] - def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0): + def where( + self, + other, + cond, + align=True, + errors="raise", + try_cast: bool = False, + axis: int = 0, + ) -> List["Block"]: if isinstance(other, ABCDataFrame): # ExtensionArrays are 1-D, so if we get here then # `other` should be a DataFrame with a single column. @@ -1991,7 +2005,7 @@ def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0) np.where(cond, self.values, other), dtype=dtype ) - return self.make_block_same_class(result, placement=self.mgr_locs) + return [self.make_block_same_class(result, placement=self.mgr_locs)] @property def _ftype(self): @@ -2743,7 +2757,7 @@ def f(m, v, i): return blocks - def _maybe_downcast(self, blocks, downcast=None): + def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: if downcast is not None: return blocks @@ -3078,7 +3092,15 @@ def concat_same_type(self, to_concat, placement=None): values, placement=placement or slice(0, len(values), 1), ndim=self.ndim ) - def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0): + def where( + self, + other, + cond, + align=True, + errors="raise", + try_cast: bool = False, + axis: int = 0, + ) -> List["Block"]: # TODO(CategoricalBlock.where): # This can all be deleted in favor of ExtensionBlock.where once # we enforce the deprecation. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8956821740bf3..1205a1d62c9d7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1822,7 +1822,7 @@ def _simple_blockify(tuples, dtype): """ values, placement = _stack_arrays(tuples, dtype) - # CHECK DTYPE? + # TODO: CHECK DTYPE? if dtype is not None and values.dtype != dtype: # pragma: no cover values = values.astype(dtype) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 66878c3b1026c..a5d0e2cb3b58f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1630,15 +1630,14 @@ def _get_period_bins(self, ax): def _take_new_index(obj, indexer, new_index, axis=0): - from pandas.core.api import Series, DataFrame - if isinstance(obj, Series): + if isinstance(obj, ABCSeries): new_values = algos.take_1d(obj.values, indexer) - return Series(new_values, index=new_index, name=obj.name) - elif isinstance(obj, DataFrame): + return obj._constructor(new_values, index=new_index, name=obj.name) + elif isinstance(obj, ABCDataFrame): if axis == 1: raise NotImplementedError("axis 1 is not supported") - return DataFrame( + return obj._constructor( obj._data.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) ) else: diff --git a/pandas/core/window.py b/pandas/core/window.py index 4b6a1cf2e9a04..a7425bc1466c3 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -265,6 +265,8 @@ def _wrap_result(self, result, block=None, obj=None): # coerce if necessary if block is not None: if is_timedelta64_dtype(block.values.dtype): + # TODO: do we know what result.dtype is at this point? + # i.e. can we just do an astype? from pandas import to_timedelta result = to_timedelta(result.ravel(), unit="ns").values.reshape( diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index cca6836acf626..0b17ece14cbd1 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -666,6 +666,7 @@ def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): # Raising in __eq__ will fallback to NumPy, which warns, fails, # then re-raises the original exception. So we just need to ignore. @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + @pytest.mark.filterwarnings("ignore:Converting timezone-aware:FutureWarning") def test_scalar_comparison_tzawareness( self, op, other, tz_aware_fixture, box_with_array ): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 486b3b28b29a3..c2c0af78aa518 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -508,7 +508,7 @@ def test_datetime(): desc_result = grouped.describe() idx = cats.codes.argsort() - ord_labels = cats.take_nd(idx) + ord_labels = cats.take(idx) # TODO: deprecate take<->take_nd alias ord_data = data.take(idx) expected = ord_data.groupby(ord_labels, observed=False).describe() assert_frame_equal(desc_result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a04fb9fd50257..1c4e41af4b023 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -33,6 +33,10 @@ except ImportError: _HAVE_FASTPARQUET = False +pytestmark = pytest.mark.filterwarnings( + "ignore:RangeIndex.* is deprecated:DeprecationWarning" +) + # setup engines & skips @pytest.fixture( From f0984955d18e7828ef3650b7040aba2975e46e81 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Aug 2019 16:16:39 -0700 Subject: [PATCH 2/3] remove comment --- pandas/tests/groupby/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index c2c0af78aa518..bd26e65ab89db 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -508,7 +508,7 @@ def test_datetime(): desc_result = grouped.describe() idx = cats.codes.argsort() - ord_labels = cats.take(idx) # TODO: deprecate take<->take_nd alias + ord_labels = cats.take(idx) ord_data = data.take(idx) expected = ord_data.groupby(ord_labels, observed=False).describe() assert_frame_equal(desc_result, expected) From 0392e3766202744f018058c7614dc28d4f3e1668 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Aug 2019 08:01:34 -0700 Subject: [PATCH 3/3] dummy to force travis