Skip to content

Commit 1a266ee

Browse files
jorisvandenbosschejreback
authored andcommitted
API: return Index instead of array from DatetimeIndex field accessors (GH15022)
closes #15022 Author: Joris Van den Bossche <[email protected]> Closes #15589 from jorisvandenbossche/api-dt-fields-index and squashes the following commits: ffacd38 [Joris Van den Bossche] doc fixes 41728a9 [Joris Van den Bossche] FIX: boolean fields should still return array 6317b6b [Joris Van den Bossche] Add whatsnew 96ed069 [Joris Van den Bossche] Preserve name for PeriodIndex field accessors cdf6cae [Joris Van den Bossche] Preserve name for DatetimeIndex field accessors f2831e2 [Joris Van den Bossche] Update timedelta accessors 52f9008 [Joris Van den Bossche] Fix tests 41008c7 [Joris Van den Bossche] API: return Index instead of array from datetime field accessors (GH15022)
1 parent 79581ff commit 1a266ee

File tree

15 files changed

+156
-78
lines changed

15 files changed

+156
-78
lines changed

doc/source/whatsnew/v0.20.0.txt

+32-1
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,38 @@ New Behavior:
471471

472472
s.map(lambda x: x.hour)
473473

474+
475+
.. _whatsnew_0200.api_breaking.index_dt_field:
476+
477+
Accessing datetime fields of Index now return Index
478+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
479+
480+
The datetime-related attributes (see :ref:`here <timeseries.components>`
481+
for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously
482+
returned numpy arrays. They will now return a new ``Index`` object, except
483+
in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`)
484+
485+
Previous behaviour:
486+
487+
.. code-block:: ipython
488+
489+
In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H')
490+
491+
In [2]: idx.hour
492+
Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32)
493+
494+
New Behavior:
495+
496+
.. ipython:: python
497+
498+
idx = pd.date_range("2015-01-01", periods=5, freq='10H')
499+
idx.hour
500+
501+
This has the advantage that specific ``Index`` methods are still available on the
502+
result. On the other hand, this might have backward incompatibilities: e.g.
503+
compared to numpy arrays, ``Index`` objects are not mutable. To get the original
504+
ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``.
505+
474506
.. _whatsnew_0200.api_breaking.s3:
475507

476508
S3 File Handling
@@ -936,4 +968,3 @@ Bug Fixes
936968
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
937969
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
938970
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)
939-

pandas/tests/indexes/datetimes/test_misc.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def test_normalize(self):
172172
class TestDatetime64(tm.TestCase):
173173

174174
def test_datetimeindex_accessors(self):
175+
175176
dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
176177
periods=365)
177178
# GH 13303
@@ -255,6 +256,34 @@ def test_datetimeindex_accessors(self):
255256
self.assertEqual(len(dti.is_year_end), 365)
256257
self.assertEqual(len(dti.weekday_name), 365)
257258

259+
dti.name = 'name'
260+
261+
# non boolean accessors -> return Index
262+
for accessor in ['year', 'month', 'day', 'hour', 'minute',
263+
'second', 'microsecond', 'nanosecond',
264+
'dayofweek', 'dayofyear', 'weekofyear',
265+
'quarter', 'weekday_name']:
266+
res = getattr(dti, accessor)
267+
assert len(res) == 365
268+
assert isinstance(res, Index)
269+
assert res.name == 'name'
270+
271+
# boolean accessors -> return array
272+
for accessor in ['is_month_start', 'is_month_end',
273+
'is_quarter_start', 'is_quarter_end',
274+
'is_year_start', 'is_year_end']:
275+
res = getattr(dti, accessor)
276+
assert len(res) == 365
277+
assert isinstance(res, np.ndarray)
278+
279+
# test boolean indexing
280+
res = dti[dti.is_quarter_start]
281+
exp = dti[[0, 90, 181, 273]]
282+
tm.assert_index_equal(res, exp)
283+
res = dti[dti.is_leap_year]
284+
exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
285+
tm.assert_index_equal(res, exp)
286+
258287
dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1),
259288
periods=4)
260289

@@ -313,5 +342,5 @@ def test_datetimeindex_accessors(self):
313342
def test_nanosecond_field(self):
314343
dti = DatetimeIndex(np.arange(10))
315344

316-
self.assert_numpy_array_equal(dti.nanosecond,
317-
np.arange(10, dtype=np.int32))
345+
self.assert_index_equal(dti.nanosecond,
346+
pd.Index(np.arange(10, dtype=np.int64)))

pandas/tests/indexes/period/test_construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self):
9191

9292
pindex = PeriodIndex(year=years, quarter=quarters)
9393

94-
self.assert_numpy_array_equal(pindex.year, years)
95-
self.assert_numpy_array_equal(pindex.quarter, quarters)
94+
self.assert_index_equal(pindex.year, pd.Index(years))
95+
self.assert_index_equal(pindex.quarter, pd.Index(quarters))
9696

9797
def test_constructor_invalid_quarters(self):
9898
self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004),

pandas/tests/indexes/period/test_period.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -658,12 +658,12 @@ def test_negative_ordinals(self):
658658

659659
def test_pindex_fieldaccessor_nat(self):
660660
idx = PeriodIndex(['2011-01', '2011-02', 'NaT',
661-
'2012-03', '2012-04'], freq='D')
661+
'2012-03', '2012-04'], freq='D', name='name')
662662

663-
exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64)
664-
self.assert_numpy_array_equal(idx.year, exp)
665-
exp = np.array([1, 2, -1, 3, 4], dtype=np.int64)
666-
self.assert_numpy_array_equal(idx.month, exp)
663+
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name')
664+
self.assert_index_equal(idx.year, exp)
665+
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name')
666+
self.assert_index_equal(idx.month, exp)
667667

668668
def test_pindex_qaccess(self):
669669
pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')

pandas/tests/indexes/timedeltas/test_timedelta.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def test_total_seconds(self):
424424
freq='s')
425425
expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9,
426426
1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9]
427-
tm.assert_almost_equal(rng.total_seconds(), np.array(expt))
427+
tm.assert_almost_equal(rng.total_seconds(), Index(expt))
428428

429429
# test Series
430430
s = Series(rng)
@@ -486,16 +486,16 @@ def test_append_numpy_bug_1681(self):
486486
def test_fields(self):
487487
rng = timedelta_range('1 days, 10:11:12.100123456', periods=2,
488488
freq='s')
489-
self.assert_numpy_array_equal(rng.days, np.array(
490-
[1, 1], dtype='int64'))
491-
self.assert_numpy_array_equal(
489+
self.assert_index_equal(rng.days, Index([1, 1], dtype='int64'))
490+
self.assert_index_equal(
492491
rng.seconds,
493-
np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13],
494-
dtype='int64'))
495-
self.assert_numpy_array_equal(rng.microseconds, np.array(
496-
[100 * 1000 + 123, 100 * 1000 + 123], dtype='int64'))
497-
self.assert_numpy_array_equal(rng.nanoseconds, np.array(
498-
[456, 456], dtype='int64'))
492+
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13],
493+
dtype='int64'))
494+
self.assert_index_equal(
495+
rng.microseconds,
496+
Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64'))
497+
self.assert_index_equal(rng.nanoseconds,
498+
Index([456, 456], dtype='int64'))
499499

500500
self.assertRaises(AttributeError, lambda: rng.hours)
501501
self.assertRaises(AttributeError, lambda: rng.minutes)
@@ -509,6 +509,10 @@ def test_fields(self):
509509
tm.assert_series_equal(s.dt.seconds, Series(
510510
[10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]))
511511

512+
# preserve name (GH15589)
513+
rng.name = 'name'
514+
assert rng.days.name == 'name'
515+
512516
def test_freq_conversion(self):
513517

514518
# doc example

pandas/tests/scalar/test_timestamp.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,20 @@ def test_nat_fields(self):
597597
def test_nat_vector_field_access(self):
598598
idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000'])
599599

600+
# non boolean fields
600601
fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute',
601602
'second', 'microsecond', 'nanosecond', 'week', 'dayofyear',
602-
'days_in_month', 'is_leap_year']
603+
'days_in_month']
604+
605+
for field in fields:
606+
result = getattr(idx, field)
607+
expected = [getattr(x, field) for x in idx]
608+
self.assert_index_equal(result, pd.Index(expected))
609+
610+
# boolean fields
611+
fields = ['is_leap_year']
612+
# other boolean fields like 'is_month_start' and 'is_month_end'
613+
# not yet supported by NaT
603614

604615
for field in fields:
605616
result = getattr(idx, field)

pandas/tests/tools/test_pivot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ def test_daily(self):
13671367
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
13681368
annual = pivot_annual(ts, 'D')
13691369

1370-
doy = ts.index.dayofyear
1370+
doy = np.asarray(ts.index.dayofyear)
13711371

13721372
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
13731373
doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1

pandas/tests/tools/test_util.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ def test_datetimeindex(self):
3131
# make sure that the ordering on datetimeindex is consistent
3232
x = date_range('2000-01-01', periods=2)
3333
result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
34-
expected1 = np.array([1, 1, 2, 2], dtype=np.int32)
35-
expected2 = np.array([1, 2, 1, 2], dtype=np.int32)
36-
tm.assert_numpy_array_equal(result1, expected1)
37-
tm.assert_numpy_array_equal(result2, expected2)
34+
expected1 = Index([1, 1, 2, 2])
35+
expected2 = Index([1, 2, 1, 2])
36+
tm.assert_index_equal(result1, expected1)
37+
tm.assert_index_equal(result2, expected2)
3838

3939
def test_empty(self):
4040
# product of empty factors

0 commit comments

Comments
 (0)