From 365ab7f9e3beb51c45e25818e30313682e98ce92 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 2 Jan 2019 14:23:42 -0800 Subject: [PATCH 1/3] parametrize --- pandas/tests/test_base.py | 162 +++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index dbdbb0bc238a9..f60d73ea1b05b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -400,98 +400,98 @@ def test_value_counts_unique_nunique(self): assert o.nunique() == len(np.unique(o.values)) - def test_value_counts_unique_nunique_null(self): + @pytest.mark.parametrize('null_obj', [np.nan, None]) + def test_value_counts_unique_nunique_null(self, null_obj): - for null_obj in [np.nan, None]: - for orig in self.objs: - o = orig.copy() - klass = type(o) - values = o._ndarray_values - - if not self._allow_na_ops(o): - continue + for orig in self.objs: + o = orig.copy() + klass = type(o) + values = o._ndarray_values - # special assign to the numpy array - if is_datetime64tz_dtype(o): - if isinstance(o, DatetimeIndex): - v = o.asi8 - v[0:2] = iNaT - values = o._shallow_copy(v) - else: - o = o.copy() - o[0:2] = iNaT - values = o._values + if not self._allow_na_ops(o): + continue - elif needs_i8_conversion(o): - values[0:2] = iNaT - values = o._shallow_copy(values) + # special assign to the numpy array + if is_datetime64tz_dtype(o): + if isinstance(o, DatetimeIndex): + v = o.asi8 + v[0:2] = iNaT + values = o._shallow_copy(v) else: - values[0:2] = null_obj - # check values has the same dtype as the original + o = o.copy() + o[0:2] = iNaT + values = o._values - assert values.dtype == o.dtype + elif needs_i8_conversion(o): + values[0:2] = iNaT + values = o._shallow_copy(values) + else: + values[0:2] = null_obj + # check values has the same dtype as the original - # create repeated values, 'n'th element is repeated by n+1 - # times - if isinstance(o, (DatetimeIndex, PeriodIndex)): - expected_index = o.copy() - expected_index.name = None + assert values.dtype == o.dtype - # attach name to klass - o = klass(values.repeat(range(1, len(o) + 1))) - o.name = 'a' - else: - if isinstance(o, DatetimeIndex): - expected_index = orig._values._shallow_copy(values) - else: - expected_index = Index(values) - expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = 'a' + # create repeated values, 'n'th element is repeated by n+1 + # times + if isinstance(o, (DatetimeIndex, PeriodIndex)): + expected_index = o.copy() + expected_index.name = None - # check values has the same dtype as the original - assert o.dtype == orig.dtype - # check values correctly have NaN - nanloc = np.zeros(len(o), dtype=np.bool) - nanloc[:3] = True - if isinstance(o, Index): - tm.assert_numpy_array_equal(pd.isna(o), nanloc) - else: - exp = Series(nanloc, o.index, name='a') - tm.assert_series_equal(pd.isna(o), exp) - - expected_s_na = Series(list(range(10, 2, -1)) + [3], - index=expected_index[9:0:-1], - dtype='int64', name='a') - expected_s = Series(list(range(10, 2, -1)), - index=expected_index[9:1:-1], - dtype='int64', name='a') - - result_s_na = o.value_counts(dropna=False) - tm.assert_series_equal(result_s_na, expected_s_na) - assert result_s_na.index.name is None - assert result_s_na.name == 'a' - result_s = o.value_counts() - tm.assert_series_equal(o.value_counts(), expected_s) - assert result_s.index.name is None - assert result_s.name == 'a' - - result = o.unique() - if isinstance(o, Index): - tm.assert_index_equal(result, - Index(values[1:], name='a')) - elif is_datetime64tz_dtype(o): - # unable to compare NaT / nan - tm.assert_extension_array_equal(result[1:], values[2:]) - assert result[0] is pd.NaT + # attach name to klass + o = klass(values.repeat(range(1, len(o) + 1))) + o.name = 'a' + else: + if isinstance(o, DatetimeIndex): + expected_index = orig._values._shallow_copy(values) else: - tm.assert_numpy_array_equal(result[1:], values[2:]) + expected_index = Index(values) + expected_index.name = None + o = o.repeat(range(1, len(o) + 1)) + o.name = 'a' + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + # check values correctly have NaN + nanloc = np.zeros(len(o), dtype=np.bool) + nanloc[:3] = True + if isinstance(o, Index): + tm.assert_numpy_array_equal(pd.isna(o), nanloc) + else: + exp = Series(nanloc, o.index, name='a') + tm.assert_series_equal(pd.isna(o), exp) + + expected_s_na = Series(list(range(10, 2, -1)) + [3], + index=expected_index[9:0:-1], + dtype='int64', name='a') + expected_s = Series(list(range(10, 2, -1)), + index=expected_index[9:1:-1], + dtype='int64', name='a') + + result_s_na = o.value_counts(dropna=False) + tm.assert_series_equal(result_s_na, expected_s_na) + assert result_s_na.index.name is None + assert result_s_na.name == 'a' + result_s = o.value_counts() + tm.assert_series_equal(o.value_counts(), expected_s) + assert result_s.index.name is None + assert result_s.name == 'a' + + result = o.unique() + if isinstance(o, Index): + tm.assert_index_equal(result, + Index(values[1:], name='a')) + elif is_datetime64tz_dtype(o): + # unable to compare NaT / nan + tm.assert_extension_array_equal(result[1:], values[2:]) + assert result[0] is pd.NaT + else: + tm.assert_numpy_array_equal(result[1:], values[2:]) - assert pd.isna(result[0]) - assert result.dtype == orig.dtype + assert pd.isna(result[0]) + assert result.dtype == orig.dtype - assert o.nunique() == 8 - assert o.nunique(dropna=False) == 9 + assert o.nunique() == 8 + assert o.nunique(dropna=False) == 9 @pytest.mark.parametrize('klass', [Index, Series]) def test_value_counts_inferred(self, klass): From b5468679f817a745874c205961631e47427474cb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 2 Jan 2019 16:19:14 -0800 Subject: [PATCH 2/3] remove unused --- pandas/util/testing.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a6ba62bbdea1e..ebdfde2da24f8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1861,10 +1861,6 @@ def getCols(k): return string.ascii_uppercase[:k] -def getArangeMat(): - return np.arange(N * K).reshape((N, K)) - - # make index def makeStringIndex(k=10, name=None): return Index(rands_array(nchars=10, size=k), name=name) @@ -2322,13 +2318,6 @@ def add_nans(panel): return panel -def add_nans_panel4d(panel4d): - for l, label in enumerate(panel4d.labels): - panel = panel4d[label] - add_nans(panel) - return panel4d - - class TestSubDict(dict): def __init__(self, *args, **kwargs): From 1793bbc380b143c3d81eae829b8125cace22ae69 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 2 Jan 2019 17:21:53 -0800 Subject: [PATCH 3/3] remove redundant methods, fix ambiguous to_m8 name for datetime --- pandas/core/arrays/datetimelike.py | 75 ++------------------ pandas/core/arrays/datetimes.py | 7 +- pandas/core/arrays/timedeltas.py | 20 +----- pandas/core/indexes/datetimelike.py | 14 +--- pandas/core/indexes/datetimes.py | 8 +-- pandas/core/indexes/period.py | 11 ++- pandas/core/indexes/timedeltas.py | 6 +- pandas/tests/arithmetic/test_datetime64.py | 4 +- pandas/tests/tseries/offsets/test_offsets.py | 6 +- 9 files changed, 29 insertions(+), 122 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 517c80619baea..3ca660b906f73 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -19,12 +19,11 @@ from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import ( - is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, - is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype, - is_list_like, is_object_dtype, is_offsetlike, is_period_dtype, - is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, - needs_i8_conversion, pandas_dtype) + is_categorical_dtype, is_datetime64_any_dtype, is_datetime64_dtype, + is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, + is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_list_like, + is_object_dtype, is_offsetlike, is_period_dtype, is_string_dtype, + is_timedelta64_dtype, is_unsigned_integer_dtype, pandas_dtype) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import isna @@ -40,32 +39,6 @@ from .base import ExtensionArray, ExtensionOpsMixin -def _make_comparison_op(cls, op): - # TODO: share code with indexes.base version? Main difference is that - # the block for MultiIndex was removed here. - def cmp_method(self, other): - if isinstance(other, ABCDataFrame): - return NotImplemented - - if needs_i8_conversion(self) and needs_i8_conversion(other): - # we may need to directly compare underlying - # representations - return self._evaluate_compare(other, op) - - # numpy will show a DeprecationWarning on invalid elementwise - # comparisons, this will raise in the future - with warnings.catch_warnings(record=True): - warnings.filterwarnings("ignore", "elementwise", FutureWarning) - with np.errstate(all='ignore'): - result = op(self._data, np.asarray(other)) - - return result - - name = '__{name}__'.format(name=op.__name__) - # TODO: docstring? - return compat.set_function_name(cmp_method, name, cls) - - class AttributesMixin(object): @property @@ -1358,41 +1331,6 @@ def __isub__(self, other): # -------------------------------------------------------------- # Comparison Methods - # Called by _add_comparison_methods defined in ExtensionOpsMixin - _create_comparison_method = classmethod(_make_comparison_op) - - def _evaluate_compare(self, other, op): - """ - We have been called because a comparison between - 8 aware arrays. numpy will warn about NaT comparisons - """ - # Called by comparison methods when comparing datetimelike - # with datetimelike - - if not isinstance(other, type(self)): - # coerce to a similar object - if not is_list_like(other): - # scalar - other = [other] - elif lib.is_scalar(lib.item_from_zerodim(other)): - # ndarray scalar - other = [other.item()] - other = type(self)._from_sequence(other) - - # compare - result = op(self.asi8, other.asi8) - - # technically we could support bool dtyped Index - # for now just return the indexing array directly - mask = (self._isnan) | (other._isnan) - - filler = iNaT - if is_bool_dtype(result): - filler = False - - result[mask] = filler - return result - def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', from_utc=False): """ @@ -1493,9 +1431,6 @@ def max(self, axis=None, skipna=True, *args, **kwargs): return self._box_func(result) -DatetimeLikeArrayMixin._add_comparison_ops() - - # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ea2742c5808a3..f5903e19d2c45 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -34,7 +34,7 @@ _midnight = time(0, 0) -def _to_m8(key, tz=None): +def _to_M8(key, tz=None): """ Timestamp-like => dt64 """ @@ -96,7 +96,6 @@ def _dt_array_cmp(cls, op): nat_result = True if opname == '__ne__' else False def wrapper(self, other): - meth = getattr(dtl.DatetimeLikeArrayMixin, opname) # TODO: return NotImplemented for Series / Index and let pandas unbox # Right now, returning NotImplemented for Index fails because we # go into the index implementation, which may be a bug? @@ -109,7 +108,7 @@ def wrapper(self, other): self._assert_tzawareness_compat(other) try: - other = _to_m8(other, tz=self.tz) + other = _to_M8(other, tz=self.tz) except ValueError: # string that cannot be parsed to Timestamp return ops.invalid_comparison(self, other, op) @@ -158,7 +157,7 @@ def wrapper(self, other): # or an object-dtype ndarray other = type(self)._from_sequence(other) - result = meth(self, other) + result = op(self.view('i8'), other.view('i8')) o_mask = other._isnan result = com.values_from_object(result) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b747e2b6b096b..6a7225acfefbf 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -36,18 +36,6 @@ _BAD_DTYPE = "dtype {dtype} cannot be converted to timedelta64[ns]" -def _to_m8(key): - """ - Timedelta-like => dt64 - """ - if not isinstance(key, Timedelta): - # this also converts strings - key = Timedelta(key) - - # return an type that can be compared - return np.int64(key.value).view(_TD_DTYPE) - - def _is_convertible_to_td(key): return isinstance(key, (Tick, timedelta, np.timedelta64, compat.string_types)) @@ -75,17 +63,15 @@ def _td_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False - meth = getattr(dtl.DatetimeLikeArrayMixin, opname) - def wrapper(self, other): if _is_convertible_to_td(other) or other is NaT: try: - other = _to_m8(other) + other = Timedelta(other) except ValueError: # failed to parse as timedelta return ops.invalid_comparison(self, other, op) - result = meth(self, other) + result = op(self.view('i8'), other.value) if isna(other): result.fill(nat_result) @@ -101,7 +87,7 @@ def wrapper(self, other): except (ValueError, TypeError): return ops.invalid_comparison(self, other, op) - result = meth(self, other) + result = op(self.view('i8'), other.view('i8')) result = com.values_from_object(result) o_mask = np.array(isna(other)) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5547266ea6bab..cfca5d1b7d2cc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -13,8 +13,8 @@ from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer, - is_list_like, is_period_dtype, is_scalar) + ensure_int64, is_dtype_equal, is_float, is_integer, is_list_like, + is_period_dtype, is_scalar) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core import algorithms, ops @@ -191,16 +191,6 @@ def wrapper(left, right): return wrapper - @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) - def _evaluate_compare(self, other, op): - result = self._eadata._evaluate_compare(other, op) - if is_bool_dtype(result): - return result - try: - return Index(result) - except TypeError: - return result - def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', from_utc=False): # See DatetimeLikeArrayMixin._ensure_localized.__doc__ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 6d9829d4ef659..7d901f4656731 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -22,7 +22,7 @@ from pandas.core.accessor import delegate_names from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _to_m8, validate_tz_from_dtype) + DatetimeArrayMixin as DatetimeArray, _to_M8, validate_tz_from_dtype) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index @@ -405,7 +405,7 @@ def __setstate__(self, state): def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ if self._has_same_tz(value): - return _to_m8(value) + return _to_M8(value) raise ValueError('Passed item and index have different timezone') def _maybe_update_attributes(self, attrs): @@ -1161,7 +1161,7 @@ def searchsorted(self, value, side='left', sorter=None): if isinstance(value, (np.ndarray, Index)): value = np.array(value, dtype=_NS_DTYPE, copy=False) else: - value = _to_m8(value, tz=self.tz) + value = _to_M8(value, tz=self.tz) return self.values.searchsorted(value, side=side) @@ -1211,7 +1211,7 @@ def insert(self, loc, item): freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq - item = _to_m8(item, tz=self.tz) + item = _to_M8(item, tz=self.tz) try: new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0eeb7551db26f..b59c32bb8a9d4 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -276,9 +276,6 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._reset_identity() return result - # ------------------------------------------------------------------------ - # Wrapping PeriodArray - # ------------------------------------------------------------------------ # Data @@ -416,6 +413,10 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return self.astype(object).values + @property + def _formatter_func(self): + return self.array._formatter(boxed=False) + # ------------------------------------------------------------------------ # Indexing @@ -496,10 +497,6 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return type(self)(result, freq=self.freq, name=self.name) - @property - def _formatter_func(self): - return self.array._formatter(boxed=False) - def asof_locs(self, where, mask): """ where : array of timestamps diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 241d12dd06159..5e8e6a423ab3f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -18,7 +18,7 @@ from pandas.core.accessor import delegate_names from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8) + TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -614,7 +614,7 @@ def searchsorted(self, value, side='left', sorter=None): if isinstance(value, (np.ndarray, Index)): value = np.array(value, dtype=_TD_DTYPE, copy=False) else: - value = _to_m8(value) + value = Timedelta(value).asm8.view(_TD_DTYPE) return self.values.searchsorted(value, side=side, sorter=sorter) @@ -664,7 +664,7 @@ def insert(self, loc, item): freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq - item = _to_m8(item) + item = Timedelta(item).asm8.view(_TD_DTYPE) try: new_tds = np.concatenate((self[:loc].asi8, [item.view(np.int64)], diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index d4e82fe2659a0..f5c4808a09123 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -20,7 +20,7 @@ from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.offsets import shift_months -from pandas.core.indexes.datetimes import _to_m8 +from pandas.core.indexes.datetimes import _to_M8 from pandas import ( Timestamp, Timedelta, Period, Series, date_range, NaT, @@ -349,7 +349,7 @@ class TestDatetimeIndexComparisons(object): def test_comparators(self, op): index = tm.makeDateIndex(100) element = index[len(index) // 2] - element = _to_m8(element) + element = _to_M8(element) arr = np.array(index) arr_result = op(arr, element) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index a938c1fe9a8fe..ac3955970587f 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -13,7 +13,7 @@ from pandas.compat import range from pandas.compat.numpy import np_datetime64_compat -from pandas.core.indexes.datetimes import DatetimeIndex, _to_m8, date_range +from pandas.core.indexes.datetimes import DatetimeIndex, _to_M8, date_range from pandas.core.series import Series import pandas.util.testing as tm @@ -47,9 +47,9 @@ class WeekDay(object): #### -def test_to_m8(): +def test_to_M8(): valb = datetime(2007, 10, 1) - valu = _to_m8(valb) + valu = _to_M8(valb) assert isinstance(valu, np.datetime64)