From 24bb1a93481c78c6d2c3657c5429f7f49735131e Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 7 Aug 2014 08:00:47 -0400 Subject: [PATCH 1/2] DOC: release notes corrections TST: argmin/max impl, tests, and doc-strings --- doc/source/api.rst | 47 ++++++++++++++++++++++++++++++--------- doc/source/io.rst | 3 ++- doc/source/v0.15.0.txt | 4 ++-- pandas/core/base.py | 30 +++++++++++++++++++++++-- pandas/core/index.py | 8 +++++++ pandas/tests/test_base.py | 21 +++++++++++++++++ pandas/tseries/index.py | 7 ------ 7 files changed, 98 insertions(+), 22 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 9d443254ae25a..62518bf0d9ffd 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -235,8 +235,8 @@ Constructor Series -Attributes and underlying data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Attributes +~~~~~~~~~~ **Axes** * **index**: axis labels @@ -246,6 +246,14 @@ Attributes and underlying data Series.values Series.dtype Series.ftype + Series.shape + Series.size + Series.nbytes + Series.ndim + Series.strides + Series.itemsize + Series.base + Series.T Conversion ~~~~~~~~~~ @@ -1087,11 +1095,36 @@ used before calling these methods directly.** Index +Attributes +~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + Index.values + Index.is_monotonic + Index.is_unique + Index.dtype + Index.inferred_type + Index.is_all_dates + Index.shape + Index.size + Index.nbytes + Index.ndim + Index.strides + Index.itemsize + Index.base + Index.T + Modifying and Computations ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ + Index.all + Index.any + Index.argmin + Index.argmax Index.copy Index.delete Index.diff @@ -1101,6 +1134,8 @@ Modifying and Computations Index.factorize Index.identical Index.insert + Index.min + Index.max Index.order Index.reindex Index.repeat @@ -1161,14 +1196,6 @@ Selecting Index.slice_indexer Index.slice_locs -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Index.is_monotonic - Index.is_numeric - .. _api.datetimeindex: DatetimeIndex diff --git a/doc/source/io.rst b/doc/source/io.rst index 32af1924aee70..40977aee44cdd 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2101,7 +2101,8 @@ any pickled pandas object (or any other pickled object) from file: .. warning:: - In 0.13, pickle preserves compatibility with pickles created prior to 0.13. These must + Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), + preserve compatibility with pickles created prior to these versions. However, these must be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. See `this question `__ for a detailed explanation. diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index bb039b4484c7d..d8e5587e80da1 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -169,10 +169,10 @@ Internal Refactoring In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be -a transparent change with only very limited API implications (:issue:`5080`,:issue:`7439`,:issue:`7796`) +a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`) - you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` -- when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passess arrays of ``datetimes`` now) +- when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) .. _whatsnew_0150.cat: diff --git a/pandas/core/base.py b/pandas/core/base.py index f685edd477b8c..c04872ab74bb0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -313,10 +313,34 @@ def max(self): """ The maximum value of the object """ return nanops.nanmax(self.values) + def argmax(self, axis=None): + """ + return a ndarray of the maximum argument indexer + + See also + -------- + numpy.ndarray.argmax + """ + return nanops.nanargmax(self.values) + def min(self): """ The minimum value of the object """ return nanops.nanmin(self.values) + def argmin(self, axis=None): + """ + return a ndarray of the minimum argument indexer + + See also + -------- + numpy.ndarray.argmin + """ + return nanops.nanargmin(self.values) + + def hasnans(self): + """ return if I have any nans; enables various perf speedups """ + return com.isnull(self).any() + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ @@ -554,10 +578,11 @@ def argmin(self, axis=None): numpy.ndarray.argmin """ - ##### FIXME: need some tests (what do do if all NaT?) i8 = self.asi8 if self.hasnans: mask = i8 == tslib.iNaT + if mask.all(): + return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() @@ -596,10 +621,11 @@ def argmax(self, axis=None): numpy.ndarray.argmax """ - #### FIXME: need some tests (what do do if all NaT?) i8 = self.asi8 if self.hasnans: mask = i8 == tslib.iNaT + if mask.all(): + return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() diff --git a/pandas/core/index.py b/pandas/core/index.py index c7b1c60a9ddc4..53da7eb8c4277 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -560,6 +560,7 @@ def _mpl_repr(self): @property def is_monotonic(self): + """ return if the index has monotonic (only equaly or increasing) values """ return self._engine.is_monotonic def is_lexsorted_for_tuple(self, tup): @@ -567,6 +568,7 @@ def is_lexsorted_for_tuple(self, tup): @cache_readonly(allow_setting=True) def is_unique(self): + """ return if the index has unique values """ return self._engine.is_unique def is_integer(self): @@ -788,6 +790,7 @@ def _get_level_number(self, level): @cache_readonly def inferred_type(self): + """ return a string of the type inferred from the values """ return lib.infer_dtype(self) def is_type_compatible(self, typ): @@ -2143,6 +2146,11 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, * def inferred_type(self): return 'integer' + @cache_readonly + def hasnans(self): + # by definition + return False + @property def asi8(self): # do not cache or you'll create a memory leak diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 90a36228e816a..356984ea88f43 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -261,6 +261,27 @@ def test_nanops(self): # check DatetimeIndex non-monotonic path self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) + # argmin/max + obj = Index(np.arange(5,dtype='int64')) + self.assertEqual(obj.argmin(),0) + self.assertEqual(obj.argmax(),4) + + obj = Index([np.nan, 1, np.nan, 2]) + self.assertEqual(obj.argmin(),1) + self.assertEqual(obj.argmax(),3) + + obj = Index([np.nan]) + self.assertEqual(obj.argmin(),-1) + self.assertEqual(obj.argmax(),-1) + + obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011,11,2),pd.NaT]) + self.assertEqual(obj.argmin(),1) + self.assertEqual(obj.argmax(),2) + + obj = Index([pd.NaT]) + self.assertEqual(obj.argmin(),-1) + self.assertEqual(obj.argmax(),-1) + def test_value_counts_unique_nunique(self): for o in self.objs: klass = type(o) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 3ada26a7e5779..ce5a2a319a336 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1490,13 +1490,6 @@ def searchsorted(self, key, side='left'): def is_type_compatible(self, typ): return typ == self.inferred_type or typ == 'datetime' - def argmin(self): - # hack to workaround argmin failure - try: - return self.values.argmin() - except Exception: # pragma: no cover - return self.asi8.argmin() - @property def inferred_type(self): # b/c datetime is represented as microseconds since the epoch, make From 1f64bc811f23465d7366a8ae546c8ce0e9292b47 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 7 Aug 2014 09:25:12 -0400 Subject: [PATCH 2/2] API: raise on __nonzero__ for Index (GH7897) --- doc/source/v0.15.0.txt | 1 + pandas/core/index.py | 7 +++++++ pandas/tests/test_index.py | 22 ++++++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index d8e5587e80da1..2520015581cc8 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -173,6 +173,7 @@ a transparent change with only very limited API implications (:issue:`5080`, :is - you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` - when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) +- MultiIndexes will now raise similary to other pandas objects w.r.t. truth testing, See :ref:`here ` (:issue:`7897`). .. _whatsnew_0150.cat: diff --git a/pandas/core/index.py b/pandas/core/index.py index 53da7eb8c4277..4f4fe092a3606 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -838,6 +838,13 @@ def __setstate__(self, state): def __deepcopy__(self, memo={}): return self.copy(deep=True) + def __nonzero__(self): + raise ValueError("The truth value of a {0} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + .format(self.__class__.__name__)) + + __bool__ = __nonzero__ + def __contains__(self, key): hash(key) # work around some kind of odd cython bug diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 5affdbe1c99aa..4162413554d49 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -74,6 +74,15 @@ def test_numeric_compat(self): "cannot perform floor division", lambda : 1 // idx) + def test_boolean_context_compat(self): + + # boolean context compat + idx = self.create_index() + def f(): + if idx: + pass + tm.assertRaisesRegexp(ValueError,'The truth value of a',f) + class TestIndex(Base, tm.TestCase): _holder = Index _multiprocess_can_split_ = True @@ -1656,6 +1665,19 @@ def setUp(self): def create_index(self): return self.index + def test_boolean_context_compat2(self): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) + common = i1.intersection(i2) + + def f(): + if common: + pass + tm.assertRaisesRegexp(ValueError,'The truth value of a',f) + def test_hash_error(self): with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" %