From 0be171340088e2a1c74050041b8682377c709aee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 20:25:41 -0800 Subject: [PATCH 01/13] CLN: avoid _ndarray_values --- pandas/core/dtypes/cast.py | 4 +++- pandas/core/indexes/multi.py | 4 ++-- pandas/core/indexes/numeric.py | 2 +- pandas/core/reshape/merge.py | 3 --- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c2b600b5d8c5b..b6fc27042240c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1181,9 +1181,11 @@ def try_timedelta(v): from pandas import to_timedelta try: - return to_timedelta(v)._ndarray_values.reshape(shape) + td_values = to_timedelta(v) except ValueError: return v.reshape(shape) + else: + return np.asarray(td_values).reshape(shape) inferred_type = lib.infer_datetimelike_array(ensure_object(v)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4bd462e83a5bc..dc77bd8d7f700 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2998,7 +2998,7 @@ def _update_indexer(idxr, indexer=indexer): indexer = _update_indexer(indexers, indexer=indexer) else: # no matches we are done - return Int64Index([])._ndarray_values + return np.array([], dtype=np.int64) elif com.is_null_slice(k): # empty slice @@ -3024,7 +3024,7 @@ def _update_indexer(idxr, indexer=indexer): # empty indexer if indexer is None: - return Int64Index([])._ndarray_values + return np.array([], dtype=np.int64) indexer = self._reorder_indexer(seq, indexer) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 06a26cc90555e..69d01e98f2374 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -439,7 +439,7 @@ def equals(self, other) -> bool: other = self._constructor(other) if not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape: return False - left, right = self._ndarray_values, other._ndarray_values + left, right = self._values, other._values return ((left == right) | (self._isnan & other._isnan)).all() except (TypeError, ValueError): return False diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 49ac1b6cfa52b..6bd7e9c0b4945 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1923,9 +1923,6 @@ def _factorize_keys(lk, rk, sort=True): def _sort_labels(uniques: np.ndarray, left, right): - if not isinstance(uniques, np.ndarray): - # tuplesafe - uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) From 3a4e2a28530173e0febc79a2e5e1336db41f9915 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 18:14:28 -0800 Subject: [PATCH 02/13] more _values --- pandas/_testing.py | 14 +++++++------- pandas/core/indexes/base.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index fce06e216dfd7..b1488745622cc 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -706,11 +706,11 @@ def _get_ilevel_values(index, level): if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): assert_attr_equal("freq", left, right, obj=obj) if isinstance(left, pd.IntervalIndex) or isinstance(right, pd.IntervalIndex): - assert_interval_array_equal(left.values, right.values) + assert_interval_array_equal(left._values, right._values) if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal(left.values, right.values, obj=f"{obj} category") + assert_categorical_equal(left._values, right._values, obj=f"{obj} category") def assert_class_equal(left, right, exact: Union[bool, str] = True, obj="Input"): @@ -1170,10 +1170,10 @@ def assert_series_equal( # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal - if not Index(left.values).equals(Index(right.values)): + if not Index(left._values).equals(Index(right._values)): msg = ( - f"[datetimelike_compat=True] {left.values} " - f"is not equal to {right.values}." + f"[datetimelike_compat=True] {left._values} " + f"is not equal to {right._values}." ) raise AssertionError(msg) else: @@ -1212,8 +1212,8 @@ def assert_series_equal( if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal( - left.values, - right.values, + left._values, + right._values, obj=f"{obj} category", check_category_order=check_category_order, ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ae2387f0fd7b4..b3641b5a59c14 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -118,12 +118,12 @@ def cmp_method(self, other): elif is_object_dtype(self) and isinstance(other, ExtensionArray): # e.g. PeriodArray with np.errstate(all="ignore"): - result = op(self.values, other) + result = op(self._values, other) elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all="ignore"): - result = ops.comp_method_OBJECT_ARRAY(op, self.values, other) + result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) else: with np.errstate(all="ignore"): @@ -1074,7 +1074,7 @@ def to_series(self, index=None, name=None): if name is None: name = self.name - return Series(self.values.copy(), index=index, name=name) + return Series(self._values.copy(), index=index, name=name) def to_frame(self, index: bool = True, name=None): """ @@ -4233,7 +4233,7 @@ def putmask(self, mask, value): -------- numpy.ndarray.putmask """ - values = self.values.copy() + values = self._values.copy() try: np.putmask(values, mask, self._convert_for_op(value)) return self._shallow_copy(values) From ebaec34f8edf22fafbf0d563ce1a2b8e3244e583 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 19:10:34 -0800 Subject: [PATCH 03/13] _ndarray_values->asi8 --- pandas/plotting/_matplotlib/converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index c399e5b9b7017..8260684c02ea6 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -218,13 +218,13 @@ def _convert_1d(values, units, axis): if isinstance(values, valid_types) or is_integer(values) or is_float(values): return get_datevalue(values, axis.freq) elif isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._ndarray_values + return values.asfreq(axis.freq).asi8 elif isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) elif lib.infer_dtype(values, skipna=False) == "period": # https://github.com/pandas-dev/pandas/issues/24304 # convert ndarray[period] -> PeriodIndex - return PeriodIndex(values, freq=axis.freq)._ndarray_values + return PeriodIndex(values, freq=axis.freq).asi8 elif isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values @@ -607,7 +607,7 @@ def _daily_finder(vmin, vmax, freq): info = np.zeros( span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] ) - info["val"][:] = dates_._ndarray_values + info["val"][:] = dates_.asi8 info["fmt"][:] = "" info["maj"][[0, -1]] = True # .. and set some shortcuts From 12ea11cb409d17a43eba28bc31b0c0ccb3b8a5ce Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 19:17:13 -0800 Subject: [PATCH 04/13] typo --- pandas/_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index f7d553694b90c..33ec4e4886aa6 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -883,7 +883,7 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray") def assert_period_array_equal(left, right, obj="PeriodArray"): _check_isinstance(left, right, PeriodArray) - assert_numpy_array_equal(left._data, right._data, obj=f"{obj}.values") + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_attr_equal("freq", left, right, obj=obj) From 130c3e66c31eeb8ba4768f34dd937067dc895b56 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 08:00:14 -0800 Subject: [PATCH 05/13] _ndarray_values->np.asarray --- pandas/core/frame.py | 2 +- pandas/io/stata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 106128004f549..f74ce22ce071e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4582,7 +4582,7 @@ def drop_duplicates( duplicated = self.duplicated(subset, keep=keep) if inplace: - (inds,) = (-duplicated)._ndarray_values.nonzero() + (inds,) = np.asarray(-duplicated).nonzero() new_data = self._data.take(inds) if ignore_index: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0397dfa923afb..6e79f5890f76d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1672,7 +1672,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra continue if convert_missing: # Replacement follows Stata notation - missing_loc = np.nonzero(missing._ndarray_values)[0] + missing_loc = np.nonzero(np.asarray(missing))[0] umissing, umissing_loc = np.unique(series[missing], return_inverse=True) replacement = Series(series, dtype=np.object) for j, um in enumerate(umissing): From 63b05c6211405bffee4d83067ef72156cd6a449e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 11:31:01 -0800 Subject: [PATCH 06/13] unnecessary values_from_object calls --- pandas/core/indexes/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e50f3130689d6..445e198b3e6b6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4089,7 +4089,6 @@ def __getitem__(self, key): if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) - key = com.values_from_object(key) result = getitem(key) if not is_scalar(result): if np.ndim(result) > 1: @@ -4224,9 +4223,11 @@ def equals(self, other) -> bool: if self.nlevels != other.nlevels: return False - return array_equivalent( - com.values_from_object(self), com.values_from_object(other) - ) + if is_extension_array_dtype(self.dtype): + # All extant EA-backed Indexes override equals; any future ones + # will need to do so as well. + raise NotImplementedError + return array_equivalent(np.asarray(self), np.asarray(other)) def identical(self, other) -> bool: """ From 92983f03212454e07745a255180528a594276644 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 07:30:31 -0800 Subject: [PATCH 07/13] comments --- pandas/core/arrays/categorical.py | 1 + pandas/core/indexes/datetimelike.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 40a169d03f39c..19704cf5bfc8e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1479,6 +1479,7 @@ def _internal_get_values(self): """ # if we are a datetime and period index, return Index to keep metadata if needs_i8_conversion(self.categories): + # TODO: should fill_value=NaT here? return self.categories.take(self._codes, fill_value=np.nan) elif is_integer_dtype(self.categories) and -1 in self._codes: return self.categories.astype("object").take(self._codes, fill_value=np.nan) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 894e1d95a17bc..c601fa88467e1 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -486,6 +486,7 @@ def where(self, cond, other=None): # e.g. we have a Categorical holding self.dtype if needs_i8_conversion(other.categories): other = other._internal_get_values() + # TODO: other.astype(other.categories.dtype)? if not is_dtype_equal(self.dtype, other.dtype): raise TypeError(f"Where requires matching dtype, not {other.dtype}") From ef05d7d98411d3126320125f4a098b2166c5b2ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 14:29:36 -0800 Subject: [PATCH 08/13] move comments to collect branch --- pandas/core/arrays/categorical.py | 1 - pandas/core/indexes/datetimelike.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 19704cf5bfc8e..40a169d03f39c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1479,7 +1479,6 @@ def _internal_get_values(self): """ # if we are a datetime and period index, return Index to keep metadata if needs_i8_conversion(self.categories): - # TODO: should fill_value=NaT here? return self.categories.take(self._codes, fill_value=np.nan) elif is_integer_dtype(self.categories) and -1 in self._codes: return self.categories.astype("object").take(self._codes, fill_value=np.nan) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c601fa88467e1..894e1d95a17bc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -486,7 +486,6 @@ def where(self, cond, other=None): # e.g. we have a Categorical holding self.dtype if needs_i8_conversion(other.categories): other = other._internal_get_values() - # TODO: other.astype(other.categories.dtype)? if not is_dtype_equal(self.dtype, other.dtype): raise TypeError(f"Where requires matching dtype, not {other.dtype}") From d5b5730e320e33c86e0194682ab0372d772d6dec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 14:51:58 -0800 Subject: [PATCH 09/13] values->_values --- pandas/core/indexes/accessors.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/numeric.py | 8 ++++---- pandas/core/indexes/period.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 8cfe1f4ac469c..feb9881ffdb81 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -321,7 +321,7 @@ def __new__(cls, data: "Series"): orig.array, name=orig.name, copy=False, - dtype=orig.values.categories.dtype, + dtype=orig._values.categories.dtype, ) if is_datetime64_dtype(data.dtype): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5997843f7ac6d..0537915e6b823 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -245,7 +245,7 @@ def _shallow_copy(self, values=None, name: Label = no_default): name = self.name if name is no_default else name if values is None: - values = self.values + values = self._values cat = Categorical(values, dtype=self.dtype) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 6c250ccd09a51..e17e57b0a1ccb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -111,7 +111,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): return Float64Index._simple_new(values, name=name) if values is None: - values = self.values + values = self._values return type(self)._simple_new(values, name=name) def _convert_for_op(self, value): @@ -252,7 +252,7 @@ def inferred_type(self) -> str: @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak - return self.values.view(self._default_dtype) + return self._values.view(self._default_dtype) class Int64Index(IntegerIndex): @@ -372,7 +372,7 @@ def astype(self, dtype, copy=True): elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): # TODO(jreback); this can change once we have an EA Index type # GH 13149 - arr = astype_nansafe(self.values, dtype=dtype) + arr = astype_nansafe(self._values, dtype=dtype) return Int64Index(arr) return super().astype(dtype, copy=copy) @@ -399,7 +399,7 @@ def _format_native_types( from pandas.io.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter( - self.values, + self._values, na_rep=na_rep, float_format=float_format, decimal=decimal, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6a7595a6686bb..ac9e656d07030 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -300,7 +300,7 @@ def _maybe_convert_timedelta(self, other): def _mpl_repr(self): # how to represent ourselves to matplotlib - return self.astype(object).values + return self.astype(object)._values @property def _formatter_func(self): From be0d40ae217796cec959233349d4496248d74da6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 17:23:23 -0800 Subject: [PATCH 10/13] values->_values --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/interval.py | 6 +++--- pandas/core/indexes/multi.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3ad693af6b0df..689d5df46981f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4678,7 +4678,7 @@ def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: # TODO: if we are a MultiIndex, we can do better # that converting to tuples if isinstance(values, ABCMultiIndex): - values = values.values + values = values._values values = ensure_categorical(values) result = values._reverse_indexer() diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6968837fb13e6..d1e42b1237656 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -404,7 +404,7 @@ def __reduce__(self): @Appender(Index.astype.__doc__) def astype(self, dtype, copy=True): with rewrite_exception("IntervalArray", type(self).__name__): - new_values = self.values.astype(dtype, copy=copy) + new_values = self._values.astype(dtype, copy=copy) if is_interval_dtype(new_values): return self._shallow_copy(new_values) return Index.astype(self, dtype, copy=copy) @@ -1090,9 +1090,9 @@ def func(self, other, sort=sort): # GH 19101: ensure empty results have correct dtype if result.empty: - result = result.values.astype(self.dtype.subtype) + result = result._values.astype(self.dtype.subtype) else: - result = result.values + result = result._values return type(self).from_tuples(result, closed=self.closed, name=result_name) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c1efa512f326a..4984564165d77 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1180,7 +1180,7 @@ def _format_native_types(self, na_rep="nan", **kwargs): sortorder=self.sortorder, verify_integrity=False, ) - return mi.values + return mi._values def format( self, @@ -1419,7 +1419,7 @@ def is_monotonic_increasing(self) -> bool: except TypeError: # we have mixed types and np.lexsort is not happy - return Index(self.values).is_monotonic + return Index(self._values).is_monotonic @cache_readonly def is_monotonic_decreasing(self) -> bool: @@ -1612,7 +1612,7 @@ def to_flat_index(self): ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - return Index(self.values, tupleize_cols=False) + return Index(self._values, tupleize_cols=False) @property def is_all_dates(self) -> bool: @@ -1914,7 +1914,7 @@ def append(self, other): arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) - to_concat = (self.values,) + tuple(k._values for k in other) + to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) # if all(isinstance(x, MultiIndex) for x in other): @@ -1924,7 +1924,7 @@ def append(self, other): return Index(new_tuples) def argsort(self, *args, **kwargs) -> np.ndarray: - return self.values.argsort(*args, **kwargs) + return self._values.argsort(*args, **kwargs) @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) def repeat(self, repeats, axis=None): @@ -2368,7 +2368,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # let's instead try with a straight Index if method is None: - return Index(self.values).get_indexer( + return Index(self._values).get_indexer( target, method=method, limit=limit, tolerance=tolerance ) @@ -3456,8 +3456,8 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - values = MultiIndex.from_tuples(values, names=self.names).values - return algos.isin(self.values, values) + values = MultiIndex.from_tuples(values, names=self.names)._values + return algos.isin(self._values, values) else: num = self._get_level_number(level) levs = self.get_level_values(num) From a49a3a6da72c3901bd66e53912f5427568588f7b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 16:39:10 -0700 Subject: [PATCH 11/13] checkpoint passing --- pandas/core/algorithms.py | 6 +++--- pandas/core/common.py | 2 +- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 10 +++++----- pandas/core/indexes/category.py | 4 ++-- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/resample.py | 2 +- pandas/core/reshape/melt.py | 6 +++--- pandas/core/reshape/merge.py | 6 +++--- pandas/core/reshape/pivot.py | 4 ++-- pandas/core/series.py | 11 ++++++++--- pandas/core/strings.py | 8 ++++---- 14 files changed, 37 insertions(+), 32 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9059054ba59f..1818f24618df5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -696,7 +696,7 @@ def value_counts( result = result.sort_index() # if we are dropna and we have NO values - if dropna and (result.values == 0).all(): + if dropna and (result._values == 0).all(): result = result.iloc[0:0] # normalizing is by len of all (regardless of dropna) @@ -709,7 +709,7 @@ def value_counts( # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) result.name = name - counts = result.values + counts = result._values else: keys, counts = _value_counts_arraylike(values, dropna) @@ -819,7 +819,7 @@ def mode(values, dropna: bool = True) -> "Series": # categorical is a fast-path if is_categorical_dtype(values): if isinstance(values, Series): - return Series(values.values.mode(dropna=dropna), name=values.name) + return Series(values._values.mode(dropna=dropna), name=values.name) return values.mode(dropna=dropna) if dropna and needs_i8_conversion(values.dtype): diff --git a/pandas/core/common.py b/pandas/core/common.py index 6230ee34bcd50..d87dcdd012a1d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -215,7 +215,7 @@ def asarray_tuplesafe(values, dtype=None): if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): values = list(values) elif isinstance(values, ABCIndexClass): - return values.values + return values._values # TODO: extract_array? if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6f743d7388574..06ac62f19defc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7083,7 +7083,7 @@ def asof(self, where, subset=None): return Series(np.nan, index=self.columns, name=where[0]) - locs = self.index.asof_locs(where, ~(nulls.values)) + locs = self.index.asof_locs(where, ~(nulls._values)) # mask the missing missing = locs == -1 @@ -7242,7 +7242,7 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): raise ValueError("Cannot use an NA value as a clip threshold") result = self - mask = isna(self.values) + mask = isna(self._values) with np.errstate(all="ignore"): if upper is not None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6184c75d4fe04..98e3b3ad258ea 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -117,12 +117,12 @@ def cmp_method(self, other): elif is_object_dtype(self) and isinstance(other, ExtensionArray): # e.g. PeriodArray with np.errstate(all="ignore"): - result = op(self._values, other) + result = op(self.values, other) elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all="ignore"): - result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) + result = ops.comp_method_OBJECT_ARRAY(op, self.values, other) else: with np.errstate(all="ignore"): @@ -1077,7 +1077,7 @@ def to_series(self, index=None, name=None): if name is None: name = self.name - return Series(self._values.copy(), index=index, name=name) + return Series(self.values.copy(), index=index, name=name) def to_frame(self, index: bool = True, name=None): """ @@ -4218,7 +4218,7 @@ def putmask(self, mask, value): -------- numpy.ndarray.putmask """ - values = self._values.copy() + values = self.values.copy() try: np.putmask(values, mask, self._convert_for_op(value)) if is_period_dtype(self.dtype): @@ -4706,7 +4706,7 @@ def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: # TODO: if we are a MultiIndex, we can do better # that converting to tuples if isinstance(values, ABCMultiIndex): - values = values._values + values = values.values values = ensure_categorical(values) result = values._reverse_indexer() diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 0537915e6b823..18f5725f1af44 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -564,7 +564,7 @@ def get_indexer_non_unique(self, target): target = target.codes indexer, missing = self._engine.get_indexer_non_unique(target) return ensure_platform_int(indexer), missing - target = target.values + target = target._values codes = self.categories.get_indexer(target) indexer, missing = self._engine.get_indexer_non_unique(codes) @@ -683,7 +683,7 @@ def map(self, mapper): >>> idx.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ - return self._shallow_copy_with_infer(self.values.map(mapper)) + return self._shallow_copy_with_infer(self._values.map(mapper)) def delete(self, loc): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 185ad8e4c365a..79cb06dd96105 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -287,7 +287,7 @@ def _is_dates_only(self) -> bool: """ from pandas.io.formats.format import _is_dates_only - return _is_dates_only(self.values) and self.tz is None + return self.tz is None and _is_dates_only(self._values) def __reduce__(self): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6226a42d601a6..396fcd3bdbe55 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -883,7 +883,7 @@ def _convert_slice_indexer(self, key: slice, kind: str): def where(self, cond, other=None): if other is None: other = self._na_value - values = np.where(cond, self.values, other) + values = np.where(cond, self._values, other) result = IntervalArray(values) return self._shallow_copy(result) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 616d0dfe545ef..6613894d9aee5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -377,7 +377,7 @@ def asof_locs(self, where, mask: np.ndarray) -> np.ndarray: """ where_idx = where if isinstance(where_idx, DatetimeIndex): - where_idx = PeriodIndex(where_idx.values, freq=self.freq) + where_idx = PeriodIndex(where_idx._values, freq=self.freq) elif not isinstance(where_idx, PeriodIndex): raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") elif where_idx.freq != self.freq: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f19a82ab6f86a..a9b46a9fdd95d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1596,7 +1596,7 @@ def _get_period_bins(self, ax): def _take_new_index(obj, indexer, new_index, axis=0): if isinstance(obj, ABCSeries): - new_values = algos.take_1d(obj.values, indexer) + new_values = algos.take_1d(obj._values, indexer) return obj._constructor(new_values, index=new_index, name=obj.name) elif isinstance(obj, ABCDataFrame): if axis == 1: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 782b8043430e1..c5557cb80651a 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -105,7 +105,7 @@ def melt( if is_extension_array_dtype(id_data): id_data = concat([id_data] * K, ignore_index=True) else: - id_data = np.tile(id_data.values, K) + id_data = np.tile(id_data._values, K) mdata[col] = id_data mcolumns = id_vars + var_name + [value_name] @@ -170,13 +170,13 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr pivot_cols = [] for target, names in zip(keys, values): - to_concat = [data[col].values for col in names] + to_concat = [data[col]._values for col in names] mdata[target] = concat_compat(to_concat) pivot_cols.append(target) for col in id_cols: - mdata[col] = np.tile(data[col].values, K) + mdata[col] = np.tile(data[col]._values, K) if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e75dced21f488..85db5586a3b36 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1346,7 +1346,7 @@ def _convert_to_mulitindex(index) -> MultiIndex: if isinstance(index, MultiIndex): return index else: - return MultiIndex.from_arrays([index.values], names=[index.name]) + return MultiIndex.from_arrays([index._values], names=[index.name]) # For multi-multi joins with one overlapping level, # the returned index if of type Index @@ -1668,10 +1668,10 @@ def flip(xs): # values to compare left_values = ( - self.left.index.values if self.left_index else self.left_join_keys[-1] + self.left.index._values if self.left_index else self.left_join_keys[-1] ) right_values = ( - self.right.index.values if self.right_index else self.right_join_keys[-1] + self.right.index._values if self.right_index else self.right_join_keys[-1] ) tolerance = self.tolerance diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index a8801d8ab3f6e..b3b0166334413 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -456,10 +456,10 @@ def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFram if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name indexed = data._constructor( - data[values].values, index=index, columns=values + data[values]._values, index=index, columns=values ) else: - indexed = data._constructor_sliced(data[values].values, index=index) + indexed = data._constructor_sliced(data[values]._values, index=index) return indexed.unstack(columns) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d8eb9b29498a..e6a26a4761f75 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -33,6 +33,7 @@ ensure_platform_int, is_bool, is_categorical_dtype, + is_datetime64tz_dtype, is_dict_like, is_extension_array_dtype, is_integer, @@ -1676,7 +1677,7 @@ def count(self, level=None): level_codes[mask] = cnt = len(lev) lev = lev.insert(cnt, lev._na_value) - obs = level_codes[notna(self.values)] + obs = level_codes[notna(self._values)] out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype="int64").__finalize__(self) @@ -2666,9 +2667,13 @@ def combine(self, other, func, fill_value=None) -> "Series": new_values = [func(lv, other) for lv in self._values] new_name = self.name - if is_categorical_dtype(self.values): + new_values = np.asarray(new_values) + if is_categorical_dtype(self.dtype) or is_datetime64tz_dtype(self.dtype): + # if we let dt64tz through, try_cast_to_ea would incorrectly + # allow bool through pass - elif is_extension_array_dtype(self.values): + elif is_extension_array_dtype(self.dtype): + # TODO: can we do this for only SparseDtype? # The function can return something of any type, so check # if the type is compatible with the calling EA. new_values = try_cast_to_ea(self._values, new_values) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 71d9e8e7a577c..15d6a0635b10f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -205,7 +205,7 @@ def _map_object(f, arr, na_mask=False, na_value=np.nan, dtype=object): return np.ndarray(0, dtype=dtype) if isinstance(arr, ABCSeries): - arr = arr.values + arr = arr._values # TODO: extract_array? if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) if na_mask: @@ -2034,8 +2034,8 @@ def __init__(self, data): self._is_categorical = is_categorical_dtype(data) self._is_string = data.dtype.name == "string" - # .values.categories works for both Series/Index - self._parent = data.values.categories if self._is_categorical else data + # ._values.categories works for both Series/Index + self._parent = data._values.categories if self._is_categorical else data # save orig to blow up categoricals to the right type self._orig = data self._freeze() @@ -2236,7 +2236,7 @@ def _get_series_list(self, others): if isinstance(others, ABCSeries): return [others] elif isinstance(others, ABCIndexClass): - return [Series(others.values, index=others)] + return [Series(others._values, index=others)] elif isinstance(others, ABCDataFrame): return [others[x] for x in others] elif isinstance(others, np.ndarray) and others.ndim == 2: From f3a4d38061c8408ae76e4e5293b4807964971823 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 18:54:44 -0700 Subject: [PATCH 12/13] values-> _values --- pandas/core/dtypes/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 682a0722de3b7..581067b65b3bf 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -229,7 +229,7 @@ def _isna_ndarraylike(obj): if not is_extension: # Avoid accessing `.values` on things like # PeriodIndex, which may be expensive. - values = getattr(obj, "values", obj) + values = getattr(obj, "_values", obj) else: values = obj @@ -270,7 +270,7 @@ def _isna_ndarraylike(obj): def _isna_ndarraylike_old(obj): - values = getattr(obj, "values", obj) + values = getattr(obj, "_values", obj) dtype = values.dtype if is_string_dtype(dtype): From dc588130518b491fdaf0bbee71a6f86dea0acd13 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 17 Mar 2020 09:28:13 -0700 Subject: [PATCH 13/13] .values->._values --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/interval.py | 2 +- pandas/core/arrays/masked.py | 4 ++-- pandas/core/ops/array_ops.py | 2 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/reshape.py | 8 ++++---- pandas/core/window/common.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 105d9581b1a25..cd1ff592ba983 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -899,7 +899,7 @@ def value_counts(self, dropna=False): index = Index( cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name ) - return Series(result.values, index=index, name=result.name) + return Series(result._values, index=index, name=result.name) def map(self, mapper): # TODO(GH-23179): Add ExtensionArray.map diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 51c94d5059f8b..45a36c6c108e1 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -153,7 +153,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): def __new__(cls, data, closed=None, dtype=None, copy=False, verify_integrity=True): if isinstance(data, ABCSeries) and is_interval_dtype(data): - data = data.values + data = data._values if isinstance(data, (cls, ABCIntervalIndex)): left = data.left diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 47892b55b3ce8..cf6c16d4cad5d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -244,11 +244,11 @@ def value_counts(self, dropna: bool = True) -> "Series": # TODO(extension) # if we have allow Index to hold an ExtensionArray # this is easier - index = value_counts.index.values.astype(object) + index = value_counts.index._values.astype(object) # if we want nans, count the mask if dropna: - counts = value_counts.values + counts = value_counts._values else: counts = np.empty(len(value_counts) + 1, dtype="int64") counts[:-1] = value_counts diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index e285c53d9813e..6e5c3369c3289 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -50,7 +50,7 @@ def comp_method_OBJECT_ARRAY(op, x, y): y = y.astype(np.object_) if isinstance(y, (ABCSeries, ABCIndex)): - y = y.values + y = y._values if x.shape != y.shape: raise ValueError("Shapes must match", x.shape, y.shape) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index c5557cb80651a..c3e170b0e39c4 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -110,7 +110,7 @@ def melt( mcolumns = id_vars + var_name + [value_name] - mdata[value_name] = frame.values.ravel("F") + mdata[value_name] = frame._values.ravel("F") for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 145cf43112be3..14c2a05e5db2c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -541,9 +541,9 @@ def factorize(index): ) if frame._is_homogeneous_type: - # For homogeneous EAs, frame.values will coerce to object. So + # For homogeneous EAs, frame._values will coerce to object. So # we concatenate instead. - dtypes = list(frame.dtypes.values) + dtypes = list(frame.dtypes._values) dtype = dtypes[0] if is_extension_array_dtype(dtype): @@ -554,11 +554,11 @@ def factorize(index): new_values = _reorder_for_extension_array_stack(new_values, N, K) else: # homogeneous, non-EA - new_values = frame.values.ravel() + new_values = frame._values.ravel() else: # non-homogeneous - new_values = frame.values.ravel() + new_values = frame._values.ravel() if dropna: mask = notna(new_values) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ed0b816f64800..fcde494f7f751 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -296,7 +296,7 @@ def zsqrt(x): mask = x < 0 if isinstance(x, ABCDataFrame): - if mask.values.any(): + if mask._values.any(): result[mask] = 0 else: if mask.any():