From 971ee8af298ed3112e9805762853adcc44324fdd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 19:11:23 -0700 Subject: [PATCH 1/4] CLN: .values->._values outside of core --- pandas/io/formats/format.py | 14 +++----------- pandas/io/json/_json.py | 6 +++--- pandas/io/pytables.py | 6 ++++-- pandas/io/stata.py | 14 +++++++------- pandas/plotting/_matplotlib/misc.py | 1 + pandas/tseries/frequencies.py | 16 ++++++++-------- pandas/util/_doctools.py | 2 +- 7 files changed, 27 insertions(+), 32 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f011293273c5b..17cc897136aad 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -58,11 +58,8 @@ ) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, - ABCIndexClass, ABCMultiIndex, ABCPeriodIndex, - ABCSeries, - ABCSparseArray, ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import isna, notna @@ -71,6 +68,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray from pandas.core.base import PandasObject import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexes.api import Index, ensure_index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -1228,11 +1226,7 @@ def _format(x): # object dtype return str(formatter(x)) - vals = self.values - if isinstance(vals, Index): - vals = vals._values - elif isinstance(vals, ABCSparseArray): - vals = vals.values + vals = extract_array(self.values, extract_numpy=True) is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = self.leading_space @@ -1457,9 +1451,7 @@ def _format_strings(self) -> List[str]: class ExtensionArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - values = self.values - if isinstance(values, (ABCIndexClass, ABCSeries)): - values = values._values + values = extract_array(self.values, extract_numpy=True) formatter = values._formatter(boxed=True) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 77a0c2f99496b..97751e9b8f71a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -186,7 +186,7 @@ def _write( indent: int, ): if not self.index and orient == "split": - obj = {"name": obj.name, "data": obj.values} + obj = {"name": obj.name, "data": obj._values} return super()._write( obj, orient, @@ -973,9 +973,9 @@ def _try_convert_to_date(self, data): # ignore numbers that are out of range if issubclass(new_data.dtype.type, np.number): in_range = ( - isna(new_data.values) + isna(new_data._values) | (new_data > self.min_stamp) - | (new_data.values == iNaT) + | (new_data._values == iNaT) ) if not in_range.all(): return data, False diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7aeed5c316d7f..d5a90732caa91 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4826,7 +4826,9 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd # encode if needed if len(data): data = ( - Series(data.ravel()).str.encode(encoding, errors).values.reshape(data.shape) + Series(data.ravel()) + .str.encode(encoding, errors) + ._values.reshape(data.shape) ) # create the sized dtype @@ -4865,7 +4867,7 @@ def _unconvert_string_array( dtype = f"U{itemsize}" if isinstance(data[0], bytes): - data = Series(data).str.decode(encoding, errors=errors).values + data = Series(data).str.decode(encoding, errors=errors)._values else: data = data.astype(dtype, copy=False).astype(object, copy=False) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 6e79f5890f76d..7b6965449f033 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -351,10 +351,10 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: def parse_dates_safe(dates, delta=False, year=False, days=False): d = {} - if is_datetime64_dtype(dates.values): + if is_datetime64_dtype(dates.dtype): if delta: time_delta = dates - stata_epoch - d["delta"] = time_delta.values.astype(np.int64) // 1000 # microseconds + d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds if days or year: # ignore since mypy reports that DatetimeIndex has no year/month date_index = DatetimeIndex(dates) @@ -368,7 +368,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): elif infer_dtype(dates, skipna=False) == "datetime": if delta: - delta = dates.values - stata_epoch + delta = dates._values - stata_epoch def f(x: datetime.timedelta) -> float: return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds @@ -377,8 +377,8 @@ def f(x: datetime.timedelta) -> float: d["delta"] = v(delta) if year: year_month = dates.apply(lambda x: 100 * x.year + x.month) - d["year"] = year_month.values // 100 - d["month"] = year_month.values - d["year"] * 100 + d["year"] = year_month._values // 100 + d["month"] = year_month._values - d["year"] * 100 if days: def g(x: datetime.datetime) -> int: @@ -2151,7 +2151,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: "It is not possible to export " "int64-based categorical data to Stata." ) - values = data[col].cat.codes.values.copy() + values = data[col].cat.codes._values.copy() # Upcast if needed so that correct missing values can be set if values.max() >= get_base_missing_value(dtype): @@ -2384,7 +2384,7 @@ def _encode_strings(self) -> None: encoded = self.data[col].str.encode(self._encoding) # If larger than _max_string_length do nothing if ( - max_len_string_array(ensure_object(encoded.values)) + max_len_string_array(ensure_object(encoded._values)) <= self._max_string_length ): self.data[col] = encoded diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 0720f544203f7..7319e8de3ec6e 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -260,6 +260,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): import matplotlib.pyplot as plt + # TODO: is the failure mentioned below still relevant? # random.sample(ndarray, int) fails on python 3.3, sigh data = list(series.values) samplings = [random.sample(data, size) for _ in range(samples)] diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 2477ff29fbfd5..4a3eb4e7168f1 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -289,7 +289,7 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: raise TypeError( f"cannot infer freq from a non-convertible index type {type(index)}" ) - index = index.values + index = index._values if not isinstance(index, pd.DatetimeIndex): index = pd.DatetimeIndex(index) @@ -305,13 +305,13 @@ class _FrequencyInferer: def __init__(self, index, warn: bool = True): self.index = index - self.values = index.asi8 + self.i8values = index.asi8 # This moves the values, which are implicitly in UTC, to the # the timezone so they are in local time if hasattr(index, "tz"): if index.tz is not None: - self.values = tz_convert(self.values, UTC, index.tz) + self.i8values = tz_convert(self.i8values, UTC, index.tz) self.warn = warn @@ -324,11 +324,11 @@ def __init__(self, index, warn: bool = True): @cache_readonly def deltas(self): - return unique_deltas(self.values) + return unique_deltas(self.i8values) @cache_readonly def deltas_asi8(self): - return unique_deltas(self.index.asi8) + return unique_deltas(self.i8values) @cache_readonly def is_unique(self) -> bool: @@ -341,7 +341,7 @@ def is_unique_asi8(self) -> bool: def get_freq(self) -> Optional[str]: """ Find the appropriate frequency string to describe the inferred - frequency of self.values + frequency of self.i8values Returns ------- @@ -393,11 +393,11 @@ def hour_deltas(self): @cache_readonly def fields(self): - return build_field_sarray(self.values) + return build_field_sarray(self.i8values) @cache_readonly def rep_stamp(self): - return Timestamp(self.values[0]) + return Timestamp(self.i8values[0]) def month_position_check(self): return libresolution.month_position_check(self.fields, self.index.dayofweek) diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index 8fd4566d7763b..71965b8e7dd9d 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -126,7 +126,7 @@ def _insert_index(self, data): if col_nlevels > 1: col = data.columns._get_level_values(0) values = [ - data.columns._get_level_values(i).values for i in range(1, col_nlevels) + data.columns._get_level_values(i)._values for i in range(1, col_nlevels) ] col_df = pd.DataFrame(values) data.columns = col_df.columns From 5117c46801e5d4e5d2978fe5f9ba6fb14e03d8a7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 19:24:22 -0700 Subject: [PATCH 2/4] .values->._values --- pandas/io/pytables.py | 2 +- pandas/io/stata.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d5a90732caa91..d876523940e15 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2382,7 +2382,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): mask = isna(categories) if mask.any(): categories = categories[~mask] - codes[codes != -1] -= mask.astype(int).cumsum().values + codes[codes != -1] -= mask.astype(int).cumsum()._values converted = Categorical.from_codes( codes, categories=categories, ordered=ordered diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 7b6965449f033..8f3aa60b7a9cc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1956,7 +1956,7 @@ def _dtype_to_stata_type(dtype: np.dtype, column: Series) -> int: if dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column._values)) return max(itemsize, 1) elif dtype == np.float64: return 255 @@ -1998,7 +1998,7 @@ def _dtype_to_default_stata_fmt( if force_strl: return "%9s" if dtype.type == np.object_: - itemsize = max_len_string_array(ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column._values)) if itemsize > max_str_len: if dta_version >= 117: return "%9s" @@ -2650,7 +2650,7 @@ def _dtype_to_stata_type_117(dtype: np.dtype, column: Series, force_strl: bool) if dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column._values)) itemsize = max(itemsize, 1) if itemsize <= 2045: return itemsize From 6f2b216c8e10a895c724bb7ec8950cc7f867dc1d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 17 Mar 2020 11:54:43 -0700 Subject: [PATCH 3/4] comment --- pandas/tseries/frequencies.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 4a3eb4e7168f1..03a9f2e879dd8 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -328,7 +328,9 @@ def deltas(self): @cache_readonly def deltas_asi8(self): - return unique_deltas(self.i8values) + # NB: we cannot use self.i8values here because we may have converted + # the tz in __init__ + return unique_deltas(self.index.asi8) @cache_readonly def is_unique(self) -> bool: From 2bb0b0aeec10ec4724f2ca16c88fed59ab6df6f7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Mar 2020 15:36:56 -0700 Subject: [PATCH 4/4] revert possibly-behavior-changing --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 97751e9b8f71a..d6b90ae99973e 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -186,7 +186,7 @@ def _write( indent: int, ): if not self.index and orient == "split": - obj = {"name": obj.name, "data": obj._values} + obj = {"name": obj.name, "data": obj.values} return super()._write( obj, orient,