diff --git a/doc/source/api.rst b/doc/source/api.rst index 90a12d449839b..94d7eb5ec8e3b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -424,10 +424,25 @@ Time series-related Series.shift Series.first_valid_index Series.last_valid_index - Series.weekday Series.resample Series.tz_convert Series.tz_localize + Series.year + Series.month + Series.day + Series.hour + Series.minute + Series.second + Series.microsecond + Series.nanosecond + Series.date + Series.time + Series.dayofyear + Series.weekofyear + Series.week + Series.dayofweek + Series.weekday + Series.quarter String handling ~~~~~~~~~~~~~~~~~~~ @@ -1129,7 +1144,9 @@ Time/Date Components DatetimeIndex.dayofweek DatetimeIndex.weekday DatetimeIndex.quarter - + DatetimeIndex.tz + DatetimeIndex.freq + DatetimeIndex.freqstr Selecting ~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 187f1a97c8f0e..b58a990a98a1d 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -69,6 +69,14 @@ API Changes - ``dtypes`` and ``ftypes`` now return a series with ``dtype=object`` on empty containers (:issue:`5740`) - The ``interpolate`` ``downcast`` keyword default has been changed from ``infer`` to ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`). +- allow a Series to utilize index methods for its index type, e.g. ``Series.year`` is now defined + for a Series with a ``DatetimeIndex`` or a ``PeriodIndex``; trying this on a non-supported Index type will + now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`) + + The following affected: + - ``date,time,year,month,day,hour,minute,second,weekofyear`` + - ``week,dayofweek,dayofyear,quarter,microsecond,nanosecond,qyear`` + - ``min(),max()`` Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index a3839542dafcc..f74f6fc2290e1 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -31,6 +31,21 @@ API changes - The ``DataFrame.interpolate()`` ``downcast`` keyword default has been changed from ``infer`` to ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`). +- allow a Series to utilize index methods for its index type, e.g. ``Series.year`` is now defined + for a Series with a ``DatetimeIndex`` or a ``PeriodIndex``; trying this on a non-supported Index type will + now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`) + + The following affected: + - ``date,time,year,month,day,hour,minute,second,weekofyear`` + - ``week,dayofweek,dayofyear,quarter,microsecond,nanosecond,qyear`` + - ``min(),max()`` + + .. ipython:: python + + s = Series(np.random.randn(5),index=tm.makeDateIndex(5)) + s + s.year + s.index.year MultiIndexing Using Slicers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/base.py b/pandas/core/base.py index 36c5a65163fad..f9bf4ca4ce91d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -5,7 +5,6 @@ import numpy as np from pandas.core import common as com - class StringMixin(object): """implements string methods so long as object defines a `__unicode__` @@ -200,3 +199,90 @@ def __unicode__(self): prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'), quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + + +# facilitate the properties on the wrapped ops +def _field_accessor(name, docstring=None): + op_accessor = '_{0}'.format(name) + def f(self): + return self._ops_compat(name,op_accessor) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + +class IndexOpsMixin(object): + """ common ops mixin to support a unified inteface / docs for Series / Index """ + + def _is_allowed_index_op(self, name): + if not self._allow_index_ops: + raise TypeError("cannot perform an {name} operations on this type {typ}".format( + name=name,typ=type(self._get_access_object()))) + + def _is_allowed_datetime_index_op(self, name): + if not self._allow_datetime_index_ops: + raise TypeError("cannot perform an {name} operations on this type {typ}".format( + name=name,typ=type(self._get_access_object()))) + + def _is_allowed_period_index_op(self, name): + if not self._allow_period_index_ops: + raise TypeError("cannot perform an {name} operations on this type {typ}".format( + name=name,typ=type(self._get_access_object()))) + + def _ops_compat(self, name, op_accessor): + from pandas.tseries.index import DatetimeIndex + from pandas.tseries.period import PeriodIndex + obj = self._get_access_object() + if isinstance(obj, DatetimeIndex): + self._is_allowed_datetime_index_op(name) + elif isinstance(obj, PeriodIndex): + self._is_allowed_period_index_op(name) + try: + return self._wrap_access_object(getattr(obj,op_accessor)) + except AttributeError: + raise TypeError("cannot perform an {name} operations on this type {typ}".format( + name=name,typ=type(obj))) + + def _get_access_object(self): + if isinstance(self, com.ABCSeries): + return self.index + return self + + def _wrap_access_object(self, obj): + # we may need to coerce the input as we don't want non int64 if + # we have an integer result + if hasattr(obj,'dtype') and com.is_integer_dtype(obj): + obj = obj.astype(np.int64) + + if isinstance(self, com.ABCSeries): + return self._constructor(obj,index=self.index).__finalize__(self) + + return obj + + def max(self): + """ The maximum value of the object """ + self._is_allowed_index_op('max') + return self.values.max() + + def min(self): + """ The minimum value of the object """ + self._is_allowed_index_op('min') + return self.values.min() + + date = _field_accessor('date','Returns numpy array of datetime.date. The date part of the Timestamps') + time = _field_accessor('time','Returns numpy array of datetime.time. The time part of the Timestamps') + year = _field_accessor('year', "The year of the datetime") + month = _field_accessor('month', "The month as January=1, December=12") + day = _field_accessor('day', "The days of the datetime") + hour = _field_accessor('hour', "The hours of the datetime") + minute = _field_accessor('minute', "The minutes of the datetime") + second = _field_accessor('second', "The seconds of the datetime") + microsecond = _field_accessor('microsecond', "The microseconds of the datetime") + nanosecond = _field_accessor('nanosecond', "The nanoseconds of the datetime") + weekofyear = _field_accessor('weekofyear', "The week ordinal of the year") + week = weekofyear + dayofweek = _field_accessor('dayofweek', "The day of the week with Monday=0, Sunday=6") + weekday = dayofweek + dayofyear = _field_accessor('dayofyear', "The ordinal day of the year") + quarter = _field_accessor('quarter', "The quarter of the date") + qyear = _field_accessor('qyear') diff --git a/pandas/core/index.py b/pandas/core/index.py index 6cc525fee0344..405e584454c06 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -10,7 +10,7 @@ import pandas.algos as _algos import pandas.index as _index from pandas.lib import Timestamp, is_datetime_array -from pandas.core.base import FrozenList, FrozenNDArray +from pandas.core.base import FrozenList, FrozenNDArray, IndexOpsMixin from pandas.util.decorators import cache_readonly, deprecate from pandas.core.common import isnull @@ -57,7 +57,7 @@ def _shouldbe_timestamp(obj): _Identity = object -class Index(FrozenNDArray): +class Index(IndexOpsMixin, FrozenNDArray): """ Immutable ndarray implementing an ordered, sliceable set. The basic object @@ -92,6 +92,9 @@ class Index(FrozenNDArray): name = None asi8 = None _comparables = ['name'] + _allow_index_ops = True + _allow_datetime_index_ops = False + _allow_period_index_ops = False _engine_type = _index.ObjectEngine diff --git a/pandas/core/series.py b/pandas/core/series.py index 67238d813b3fa..50b22ae8dd785 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -30,7 +30,7 @@ from pandas.core.indexing import ( _check_bool_indexer, _check_slice_bounds, _is_index_slice, _maybe_convert_indices) -from pandas.core import generic +from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical from pandas.tseries.index import DatetimeIndex @@ -91,7 +91,7 @@ def f(self, *args, **kwargs): # Series class -class Series(generic.NDFrame): +class Series(base.IndexOpsMixin, generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). @@ -122,6 +122,15 @@ class Series(generic.NDFrame): Copy input data """ _metadata = ['name'] + _allow_index_ops = True + + @property + def _allow_datetime_index_ops(self): + return self.index.is_all_dates and isinstance(self.index, DatetimeIndex) + + @property + def _allow_period_index_ops(self): + return self.index.is_all_dates and isinstance(self.index, PeriodIndex) def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): @@ -2297,11 +2306,6 @@ def asof(self, where): new_values = com.take_1d(values, locs) return self._constructor(new_values, index=where).__finalize__(self) - @property - def weekday(self): - return self._constructor([d.weekday() for d in self.index], - index=self.index).__finalize__(self) - @cache_readonly def str(self): from pandas.core.strings import StringMethods diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 071d609c6e44e..32416dc975e64 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1,9 +1,14 @@ import re import numpy as np import pandas.compat as compat +import pandas as pd from pandas.compat import u from pandas.core.base import FrozenList, FrozenNDArray from pandas.util.testing import assertRaisesRegexp, assert_isinstance +from pandas import Series, Index, DatetimeIndex, PeriodIndex +from pandas import _np_version_under1p7 +import nose + import pandas.util.testing as tm class CheckStringMixin(object): @@ -120,6 +125,101 @@ def test_values(self): self.assert_numpy_array_equal(self.container, original) self.assertEqual(vals[0], n) +class Ops(tm.TestCase): + def setUp(self): + self.int_index = tm.makeIntIndex(10) + self.float_index = tm.makeFloatIndex(10) + self.dt_index = tm.makeDateIndex(10) + self.period_index = tm.makePeriodIndex(10) + self.string_index = tm.makeStringIndex(10) + + arr = np.random.randn(10) + self.int_series = Series(arr, index=self.int_index) + self.float_series = Series(arr, index=self.int_index) + self.dt_series = Series(arr, index=self.dt_index) + self.period_series = Series(arr, index=self.period_index) + self.string_series = Series(arr, index=self.string_index) + + self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in ['int','float','dt','period','string'] for f in ['index','series'] ] + + def check_ops_properties(self, props, filter=None, ignore_failures=False): + for op in props: + for o in self.is_valid_objs: + + # if a filter, skip if it doesn't match + if filter is not None: + filt = o.index if isinstance(o, Series) else o + if not filter(filt): + continue + + try: + if isinstance(o, Series): + expected = Series(getattr(o.index,op),index=o.index) + else: + expected = getattr(o,op) + except (AttributeError): + if ignore_failures: + continue + + result = getattr(o,op) + + # these couuld be series, arrays or scalars + if isinstance(result,Series) and isinstance(expected,Series): + tm.assert_series_equal(result,expected) + elif isinstance(result,Index) and isinstance(expected,Index): + tm.assert_index_equal(result,expected) + elif isinstance(result,np.ndarray) and isinstance(expected,np.ndarray): + self.assert_numpy_array_equal(result,expected) + else: + self.assertEqual(result, expected) + + # freq raises AttributeError on an Int64Index because its not defined + # we mostly care about Series hwere anyhow + if not ignore_failures: + for o in self.not_valid_objs: + self.assertRaises(TypeError, lambda : getattr(o,op)) + +class TestIndexOps(Ops): + + def setUp(self): + super(TestIndexOps, self).setUp() + self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ] + self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ] + + def test_ops(self): + if _np_version_under1p7: + raise nose.SkipTest("test only valid in numpy >= 1.7") + for op in ['max','min']: + for o in self.objs: + result = getattr(o,op)() + expected = getattr(o.values,op)() + self.assertEqual(result, expected) + +class TestDatetimeIndexOps(Ops): + _allowed = '_allow_datetime_index_ops' + + def setUp(self): + super(TestDatetimeIndexOps, self).setUp() + mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops + self.is_valid_objs = [ o for o in self.objs if mask(o) ] + self.not_valid_objs = [ o for o in self.objs if not mask(o) ] + + def test_ops_properties(self): + self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) + self.check_ops_properties(['date','time','microsecond','nanosecond'], lambda x: isinstance(x,DatetimeIndex)) + +class TestPeriodIndexOps(Ops): + _allowed = '_allow_period_index_ops' + + def setUp(self): + super(TestPeriodIndexOps, self).setUp() + mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops + self.is_valid_objs = [ o for o in self.objs if mask(o) ] + self.not_valid_objs = [ o for o in self.objs if not mask(o) ] + + def test_ops_properties(self): + self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) + self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex)) if __name__ == '__main__': import nose diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 23181485d3bbb..5831d0ce13c9d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -146,6 +146,7 @@ class DatetimeIndex(Int64Index): offset = None _comparables = ['name','freqstr','tz'] + _allow_datetime_index_ops = True def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, @@ -1382,6 +1383,7 @@ def map(self, f): # alias to offset @property def freq(self): + """ return the frequency object if its set, otherwise None """ return self.offset @cache_readonly @@ -1393,26 +1395,27 @@ def inferred_freq(self): @property def freqstr(self): + """ return the frequency object as a string if its set, otherwise None """ return self.offset.freqstr - year = _field_accessor('year', 'Y') - month = _field_accessor('month', 'M', "The month as January=1, December=12") - day = _field_accessor('day', 'D') - hour = _field_accessor('hour', 'h') - minute = _field_accessor('minute', 'm') - second = _field_accessor('second', 's') - microsecond = _field_accessor('microsecond', 'us') - nanosecond = _field_accessor('nanosecond', 'ns') - weekofyear = _field_accessor('weekofyear', 'woy') - week = weekofyear - dayofweek = _field_accessor('dayofweek', 'dow', - "The day of the week with Monday=0, Sunday=6") - weekday = dayofweek - dayofyear = _field_accessor('dayofyear', 'doy') - quarter = _field_accessor('quarter', 'q') + _year = _field_accessor('year', 'Y') + _month = _field_accessor('month', 'M', "The month as January=1, December=12") + _day = _field_accessor('day', 'D') + _hour = _field_accessor('hour', 'h') + _minute = _field_accessor('minute', 'm') + _second = _field_accessor('second', 's') + _microsecond = _field_accessor('microsecond', 'us') + _nanosecond = _field_accessor('nanosecond', 'ns') + _weekofyear = _field_accessor('weekofyear', 'woy') + _week = _weekofyear + _dayofweek = _field_accessor('dayofweek', 'dow', + "The day of the week with Monday=0, Sunday=6") + _weekday = _dayofweek + _dayofyear = _field_accessor('dayofyear', 'doy') + _quarter = _field_accessor('quarter', 'q') @property - def time(self): + def _time(self): """ Returns numpy array of datetime.time. The time part of the Timestamps. """ @@ -1421,7 +1424,7 @@ def time(self): return _algos.arrmap_object(self.asobject, lambda x: x.time()) @property - def date(self): + def _date(self): """ Returns numpy array of datetime.date. The date part of the Timestamps. """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 974c0a52a35de..337533ad29f4f 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -551,6 +551,7 @@ class PeriodIndex(Int64Index): >>> idx2 = PeriodIndex(start='2000', end='2010', freq='A') """ _box_scalars = True + _allow_period_index_ops = True __eq__ = _period_index_cmp('__eq__') __ne__ = _period_index_cmp('__ne__') @@ -773,19 +774,19 @@ def asfreq(self, freq=None, how='E'): def to_datetime(self, dayfirst=False): return self.to_timestamp() - year = _field_accessor('year', 0) - month = _field_accessor('month', 3) - day = _field_accessor('day', 4) - hour = _field_accessor('hour', 5) - minute = _field_accessor('minute', 6) - second = _field_accessor('second', 7) - weekofyear = _field_accessor('week', 8) - week = weekofyear - dayofweek = _field_accessor('dayofweek', 10) - weekday = dayofweek - dayofyear = day_of_year = _field_accessor('dayofyear', 9) - quarter = _field_accessor('quarter', 2) - qyear = _field_accessor('qyear', 1) + _year = _field_accessor('year', 0) + _month = _field_accessor('month', 3) + _day = _field_accessor('day', 4) + _hour = _field_accessor('hour', 5) + _minute = _field_accessor('minute', 6) + _second = _field_accessor('second', 7) + _weekofyear = _field_accessor('week', 8) + _week = _weekofyear + _dayofweek = _field_accessor('dayofweek', 10) + _weekday = _dayofweek + _dayofyear = day_of_year = _field_accessor('dayofyear', 9) + _quarter = _field_accessor('quarter', 2) + _qyear = _field_accessor('qyear', 1) # Try to run function on index first, and then on elements of index # Especially important for group-by functionality