From 41008c789868e61416f570b6bb7fdb7ae920c683 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 10:32:57 +0100 Subject: [PATCH 1/8] API: return Index instead of array from datetime field accessors (GH15022) --- pandas/tests/indexes/datetimes/test_misc.py | 4 +- .../tests/indexes/period/test_construction.py | 4 +- pandas/tests/indexes/period/test_period.py | 8 +-- pandas/tests/tools/test_util.py | 8 +-- pandas/tests/tseries/test_timezones.py | 70 +++++++++---------- pandas/tseries/common.py | 2 + pandas/tseries/converter.py | 2 +- pandas/tseries/index.py | 15 ++-- pandas/tseries/period.py | 5 +- pandas/tseries/util.py | 4 +- 10 files changed, 61 insertions(+), 61 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index e99f1d46637c2..4ecff13481054 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -313,5 +313,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + self.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index f13a84f4f0e92..ab70ad59846e8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + self.assert_index_equal(pindex.year, pd.Index(years)) + self.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4fbadfca06ede..f1051eb828b4e 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -660,10 +660,10 @@ def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tests/tools/test_util.py b/pandas/tests/tools/test_util.py index 2672db13a959f..ed64e8f42d84b 100644 --- a/pandas/tests/tools/test_util.py +++ b/pandas/tests/tools/test_util.py @@ -31,10 +31,10 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range('2000-01-01', periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = np.array([1, 1, 2, 2], dtype=np.int32) - expected2 = np.array([1, 2, 1, 2], dtype=np.int32) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) def test_empty(self): # product of empty factors diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 1ccc1652d2719..28eddc3e5841a 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -358,8 +358,8 @@ def test_field_access_localize(self): dr = date_range('2011-10-02 00:00', freq='h', periods=10, tz=self.tzstr('America/Atikokan')) - expected = np.arange(10, dtype=np.int32) - self.assert_numpy_array_equal(dr.hour, expected) + expected = pd.Index(np.arange(10, dtype=np.int64)) + self.assert_index_equal(dr.hour, expected) def test_with_tz(self): tz = self.tz('US/Central') @@ -947,8 +947,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -956,8 +956,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ['2008-05-12 09:50:00', @@ -965,8 +965,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -974,8 +974,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tz_convert_hour_overflow_dst_timestamps(self): # Regression test for: @@ -989,8 +989,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -998,8 +998,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = [Timestamp('2008-05-12 09:50:00', tz=tz), @@ -1007,8 +1007,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -1016,8 +1016,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tslib_tz_convert_trans_pos_plus_1__bug(self): # Regression test for tslib.tz_convert(vals, tz1, tz2). @@ -1028,9 +1028,8 @@ def test_tslib_tz_convert_trans_pos_plus_1__bug(self): idx = idx.tz_localize('UTC') idx = idx.tz_convert('Europe/Moscow') - expected = np.repeat(np.array([3, 4, 5], dtype=np.int32), - np.array([n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + self.assert_index_equal(idx.hour, Index(expected)) def test_tslib_tz_convert_dst(self): for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: @@ -1039,62 +1038,57 @@ def test_tslib_tz_convert_dst(self): tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5], dtype=np.int32), + 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - dtype=np.int32), + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # End DST idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4], dtype=np.int32), + 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10], dtype=np.int32), + 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # daily # Start DST idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([19, 19], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([19, 19])) idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([5, 5], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([5, 5])) # End DST idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([20, 20], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([20, 20])) idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([4, 4], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([4, 4])) def test_tzlocal(self): # GH 13583 diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 82fcdbcd0d367..f9fd27176487c 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -105,6 +105,8 @@ def _delegate_property_get(self, name): elif not is_list_like(result): return result + result = np.asarray(result) + # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 8aea14a2688d1..bc768a8bc5b58 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -455,7 +455,7 @@ def period_break(dates, period): """ current = getattr(dates, period) previous = getattr(dates - 1, period) - return (current - previous).nonzero()[0] + return np.nonzero(current - previous)[0] def has_level_label(label_flags, vmin): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 983c1a4cd9de9..0e590a482431d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -73,16 +73,19 @@ def f(self): result = libts.get_start_end_field(values, field, self.freqstr, month_kw) + result = self._maybe_mask_results(result, convert='float64') + elif field in ['weekday_name']: result = libts.get_date_name_field(values, field) - return self._maybe_mask_results(result) + result = self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - return libts.get_date_field(values, field) + result = libts.get_date_field(values, field) else: result = libts.get_date_field(values, field) + result = self._maybe_mask_results(result, convert='float64') - return self._maybe_mask_results(result, convert='float64') + return Index(result) f.__name__ = name f.__doc__ = docstring @@ -1909,9 +1912,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = self.year - month = self.month - day = self.day + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index f7e9ba9eaa9b1..8dd48aff29c28 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,7 +52,8 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._values, base) + return Index(result) f.__name__ = name f.__doc__ = docstring return property(f) @@ -585,7 +586,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(self.year) + return tslib._isleapyear_arr(np.asarray(self.year)) @property def start_time(self): diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index dc460dee8415b..da3bb075dd02c 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -54,7 +54,7 @@ def pivot_annual(series, freq=None): if freq == 'D': width = 366 - offset = index.dayofyear - 1 + offset = np.asarray(index.dayofyear) - 1 # adjust for leap year offset[(~isleapyear(year)) & (offset >= 59)] += 1 @@ -63,7 +63,7 @@ def pivot_annual(series, freq=None): # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 - offset = index.month - 1 + offset = np.asarray(index.month) - 1 columns = lrange(1, 13) elif freq == 'H': width = 8784 From 52f9008bebb0a4f42357ccb3a4bfa6b02ae330b3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 12:16:37 +0100 Subject: [PATCH 2/8] Fix tests --- pandas/tests/scalar/test_timestamp.py | 2 +- pandas/tests/tools/test_pivot.py | 2 +- pandas/tests/tseries/test_timezones.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 082f0fa9c40d5..48e0b96a95209 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -604,7 +604,7 @@ def test_nat_vector_field_access(self): for field in fields: result = getattr(idx, field) expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) + self.assert_index_equal(result, pd.Index(expected)) s = pd.Series(idx) diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index 62863372dbd02..4502f232c6d9c 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -1367,7 +1367,7 @@ def test_daily(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): annual = pivot_annual(ts, 'D') - doy = ts.index.dayofyear + doy = np.asarray(ts.index.dayofyear) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 28eddc3e5841a..1fc0e1b73df6b 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -358,7 +358,7 @@ def test_field_access_localize(self): dr = date_range('2011-10-02 00:00', freq='h', periods=10, tz=self.tzstr('America/Atikokan')) - expected = pd.Index(np.arange(10, dtype=np.int64)) + expected = Index(np.arange(10, dtype=np.int64)) self.assert_index_equal(dr.hour, expected) def test_with_tz(self): From f2831e2a2074e27e5cd3cfc0728d989742ee4680 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 7 Mar 2017 15:06:28 +0100 Subject: [PATCH 3/8] Update timedelta accessors --- .../indexes/timedeltas/test_timedelta.py | 24 +++++++++++-------- pandas/tseries/tdi.py | 5 ++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 4c8571e4f08f9..3abc2d8422fd3 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -424,7 +424,7 @@ def test_total_seconds(self): freq='s') expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] - tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) # test Series s = Series(rng) @@ -486,16 +486,16 @@ def test_append_numpy_bug_1681(self): def test_fields(self): rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, freq='s') - self.assert_numpy_array_equal(rng.days, np.array( - [1, 1], dtype='int64')) - self.assert_numpy_array_equal( + self.assert_index_equal(rng.days, Index([1, 1], dtype='int64')) + self.assert_index_equal( rng.seconds, - np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], - dtype='int64')) - self.assert_numpy_array_equal(rng.microseconds, np.array( - [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) - self.assert_numpy_array_equal(rng.nanoseconds, np.array( - [456, 456], dtype='int64')) + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_index_equal( + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_index_equal(rng.nanoseconds, + Index([456, 456], dtype='int64')) self.assertRaises(AttributeError, lambda: rng.hours) self.assertRaises(AttributeError, lambda: rng.minutes) @@ -509,6 +509,10 @@ def test_fields(self): tm.assert_series_equal(s.dt.seconds, Series( [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + # preserve name (GH15589) + rng.name = 'name' + assert rng.days.name == 'name' + def test_freq_conversion(self): # doc example diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 13d844bb6a399..55333890640c1 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -374,7 +374,7 @@ def _get_field(self, m): else: result = np.array([getattr(Timedelta(val), m) for val in values], dtype='int64') - return result + return Index(result, name=self.name) @property def days(self): @@ -437,7 +437,8 @@ def total_seconds(self): .. versionadded:: 0.17.0 """ - return self._maybe_mask_results(1e-9 * self.asi8) + return Index(self._maybe_mask_results(1e-9 * self.asi8), + name=self.name) def to_pytimedelta(self): """ From cdf6caecd59394bd84b6b5aeb74a1b2b42ea7ddb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 7 Mar 2017 15:20:00 +0100 Subject: [PATCH 4/8] Preserve name for DatetimeIndex field accessors --- pandas/tests/indexes/datetimes/test_misc.py | 14 ++++++++++++++ pandas/tseries/index.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 4ecff13481054..bbb231bd58ad4 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,6 +172,7 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): +<<<<<<< f2831e2a2074e27e5cd3cfc0728d989742ee4680 dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) # GH 13303 @@ -255,6 +256,19 @@ def test_datetimeindex_accessors(self): self.assertEqual(len(dti.is_year_end), 365) self.assertEqual(len(dti.weekday_name), 365) + dti.name = 'name' + + for accessor in ['year', 'month', 'day', 'hour', 'minute', 'second', + 'microsecond', 'nanosecond', 'dayofweek', 'dayofyear', + 'weekofyear', 'quarter', + 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end', 'weekday_name']: + res = getattr(dti, accessor) + self.assertEqual(len(res), 365) + self.assertIsInstance(res, Index) + self.assertEqual(res.name, 'name') + dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 0e590a482431d..c3608d847e867 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -85,7 +85,7 @@ def f(self): result = libts.get_date_field(values, field) result = self._maybe_mask_results(result, convert='float64') - return Index(result) + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring From 96ed06928a96bb85f034d12ac8c1acb56b3e817b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 7 Mar 2017 15:26:40 +0100 Subject: [PATCH 5/8] Preserve name for PeriodIndex field accessors --- pandas/tests/indexes/period/test_period.py | 6 +++--- pandas/tseries/period.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index f1051eb828b4e..6a6c0ab49b15d 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -658,11 +658,11 @@ def test_negative_ordinals(self): def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') + '2012-03', '2012-04'], freq='D', name='name') - exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name') self.assert_index_equal(idx.year, exp) - exp = Index([1, 2, -1, 3, 4], dtype=np.int64) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name') self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8dd48aff29c28..c279d5a9342e8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -53,7 +53,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) result = get_period_field_arr(alias, self._values, base) - return Index(result) + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring return property(f) From 6317b6bcb9422664f53d9ee7c8002e7838539ed0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 7 Mar 2017 15:38:46 +0100 Subject: [PATCH 6/8] Add whatsnew --- doc/source/whatsnew/v0.20.0.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5ac7624856040..402ccb83042cb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -471,6 +471,36 @@ New Behavior: s.map(lambda x: x.hour) + +Accessing datetime fields of Index now return Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The several datetime-related attributes (see :ref:`here ` +for an overview) of DatetimeIndex, PeriodIndex and TimedeltaIndex previously +returned numpy arrays, now they will return a new Index object (:issue:`15022`). + +Previous behaviour: + +.. code-block:: ipython + + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + +New Behavior: + +.. ipython:: python + + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour + +This has the advantage that specific Index methods are still available on the +result. On the other hand, this might have backward incompatibilities: e.g. +compared to numpy arrays, Index objects are not mutable (values cannot be set +by indexing). To get the original result, you can convert to a nunpy array +explicitly using ``np.asarray(idx.hour)``. + .. _whatsnew_0200.api_breaking.s3: S3 File Handling From 41728a96f71829e956de40ac83c4340a325ed9a5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:24:32 +0100 Subject: [PATCH 7/8] FIX: boolean fields should still return array --- doc/source/whatsnew/v0.20.0.txt | 4 ++- pandas/tests/indexes/datetimes/test_misc.py | 33 +++++++++++++++------ pandas/tests/scalar/test_timestamp.py | 13 +++++++- pandas/tseries/index.py | 10 ++++--- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 402ccb83042cb..3421fe4811cdf 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -478,6 +478,8 @@ Accessing datetime fields of Index now return Index The several datetime-related attributes (see :ref:`here ` for an overview) of DatetimeIndex, PeriodIndex and TimedeltaIndex previously returned numpy arrays, now they will return a new Index object (:issue:`15022`). +Only in case of a boolean field, still a boolean array is returned to support +boolean indexing. Previous behaviour: @@ -498,7 +500,7 @@ New Behavior: This has the advantage that specific Index methods are still available on the result. On the other hand, this might have backward incompatibilities: e.g. compared to numpy arrays, Index objects are not mutable (values cannot be set -by indexing). To get the original result, you can convert to a nunpy array +by indexing). To get the original result, you can convert to a numpy array explicitly using ``np.asarray(idx.hour)``. .. _whatsnew_0200.api_breaking.s3: diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index bbb231bd58ad4..1a065ac475752 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,7 +172,7 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): -<<<<<<< f2831e2a2074e27e5cd3cfc0728d989742ee4680 + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) # GH 13303 @@ -258,16 +258,31 @@ def test_datetimeindex_accessors(self): dti.name = 'name' - for accessor in ['year', 'month', 'day', 'hour', 'minute', 'second', - 'microsecond', 'nanosecond', 'dayofweek', 'dayofyear', - 'weekofyear', 'quarter', - 'is_month_start', 'is_month_end', + # non boolean accessors -> return Index + for accessor in ['year', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', + 'dayofweek', 'dayofyear', 'weekofyear', + 'quarter', 'weekday_name']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == 'name' + + # boolean accessors -> return array + for accessor in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end', 'weekday_name']: + 'is_year_start', 'is_year_end']: res = getattr(dti, accessor) - self.assertEqual(len(res), 365) - self.assertIsInstance(res, Index) - self.assertEqual(res.name, 'name') + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') + tm.assert_index_equal(res, exp) dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 48e0b96a95209..bbf33c4db5ad7 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -597,15 +597,26 @@ def test_nat_fields(self): def test_nat_vector_field_access(self): idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + # non boolean fields fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] + 'days_in_month'] for field in fields: result = getattr(idx, field) expected = [getattr(x, field) for x in idx] self.assert_index_equal(result, pd.Index(expected)) + # boolean fields + fields = ['is_leap_year'] + # other boolean fields like 'is_month_start' and 'is_month_end' + # not yet supported by NaT + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + s = pd.Series(idx) for field in fields: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index c3608d847e867..11d2d29597fc0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,6 +64,7 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() + # boolean accessors -> return array if field in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']: @@ -73,14 +74,15 @@ def f(self): result = libts.get_start_end_field(values, field, self.freqstr, month_kw) - result = self._maybe_mask_results(result, convert='float64') + return self._maybe_mask_results(result, convert='float64') + elif field in ['is_leap_year']: + # no need to mask NaT + return libts.get_date_field(values, field) + # non-boolean accessors -> return Index elif field in ['weekday_name']: result = libts.get_date_name_field(values, field) result = self._maybe_mask_results(result) - elif field in ['is_leap_year']: - # no need to mask NaT - result = libts.get_date_field(values, field) else: result = libts.get_date_field(values, field) result = self._maybe_mask_results(result, convert='float64') From ffacd384b04edf5280da74d3bdce2f624a48c24d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 16:33:16 +0100 Subject: [PATCH 8/8] doc fixes --- doc/source/whatsnew/v0.20.0.txt | 8 +++++--- pandas/tests/indexes/datetimes/test_misc.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3421fe4811cdf..61ec609cd57b2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -472,14 +472,16 @@ New Behavior: s.map(lambda x: x.hour) +.. _whatsnew_0200.api_breaking.index_dt_field: + Accessing datetime fields of Index now return Index ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The several datetime-related attributes (see :ref:`here ` -for an overview) of DatetimeIndex, PeriodIndex and TimedeltaIndex previously +for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously returned numpy arrays, now they will return a new Index object (:issue:`15022`). -Only in case of a boolean field, still a boolean array is returned to support -boolean indexing. +Only in the case of a boolean field, a the return value is still a boolean array +instead of an Index (to support boolean indexing). Previous behaviour: diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 1a065ac475752..ef24c493f5090 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -260,7 +260,7 @@ def test_datetimeindex_accessors(self): # non boolean accessors -> return Index for accessor in ['year', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', + 'second', 'microsecond', 'nanosecond', 'dayofweek', 'dayofyear', 'weekofyear', 'quarter', 'weekday_name']: res = getattr(dti, accessor)