Skip to content

Commit e23e6f1

Browse files
sinhrksjreback
authored andcommitted
API: PeriodIndex.values now return array of Period objects
split from #13941 (comment) Author: sinhrks <[email protected]> Closes #13988 from sinhrks/period_values and squashes the following commits: d7637c9 [sinhrks] API: PeriodIndex.values now return array of Period objects
1 parent 3923fcd commit e23e6f1

File tree

14 files changed

+219
-109
lines changed

14 files changed

+219
-109
lines changed

doc/source/whatsnew/v0.19.0.txt

+22-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Highlights include:
1616
- :func:`merge_asof` for asof-style time-series joining, see :ref:`here <whatsnew_0190.enhancements.asof_merge>`
1717
- ``.rolling()`` are now time-series aware, see :ref:`here <whatsnew_0190.enhancements.rolling_ts>`
1818
- pandas development api, see :ref:`here <whatsnew_0190.dev_api>`
19-
- ``PeriodIndex`` now has its own ``period`` dtype. see ref:`here <whatsnew_0190.api.perioddtype>`
19+
- ``PeriodIndex`` now has its own ``period`` dtype, and changed to be more consistent with other ``Index`` classes. See ref:`here <whatsnew_0190.api.period>`
2020

2121
.. contents:: What's new in v0.19.0
2222
:local:
@@ -643,10 +643,13 @@ Furthermore:
643643
- Passing duplicated ``percentiles`` will now raise a ``ValueError``.
644644
- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`)
645645

646-
.. _whatsnew_0190.api.perioddtype:
646+
.. _whatsnew_0190.api.period:
647+
648+
``Period`` changes
649+
^^^^^^^^^^^^^^^^^^
647650

648651
``PeriodIndex`` now has ``period`` dtype
649-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
652+
""""""""""""""""""""""""""""""""""""""""
650653

651654
``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a
652655
pandas extension dtype like ``category`` or :ref:`timezone aware dtype <timeseries.timezone_series>` (``datetime64[ns, tz]``). (:issue:`13941`).
@@ -681,7 +684,7 @@ New Behavior:
681684
.. _whatsnew_0190.api.periodnat:
682685

683686
``Period('NaT')`` now returns ``pd.NaT``
684-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
687+
""""""""""""""""""""""""""""""""""""""""
685688

686689
Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`, :issue:`13582`)
687690

@@ -719,6 +722,21 @@ New Behavior:
719722
pd.NaT + 1
720723
pd.NaT - 1
721724

725+
``PeriodIndex.values`` now returns array of ``Period`` object
726+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
727+
728+
``.values`` is changed to return array of ``Period`` object, rather than array
729+
of ``int64`` (:issue:`13988`)
730+
731+
.. code-block:: ipython
732+
In [6]: pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
733+
In [7]: pi.values
734+
array([492, 493])
735+
736+
.. ipython:: python
737+
738+
pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
739+
pi.values
722740

723741
.. _whatsnew_0190.api.difference:
724742

pandas/indexes/base.py

+13-14
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ def _constructor(self):
12511251
@cache_readonly
12521252
def _engine(self):
12531253
# property, for now, slow to look up
1254-
return self._engine_type(lambda: self.values, len(self))
1254+
return self._engine_type(lambda: self._values, len(self))
12551255

12561256
def _validate_index_level(self, level):
12571257
"""
@@ -1823,13 +1823,13 @@ def union(self, other):
18231823

18241824
if self.is_monotonic and other.is_monotonic:
18251825
try:
1826-
result = self._outer_indexer(self.values, other._values)[0]
1826+
result = self._outer_indexer(self._values, other._values)[0]
18271827
except TypeError:
18281828
# incomparable objects
1829-
result = list(self.values)
1829+
result = list(self._values)
18301830

18311831
# worth making this faster? a very unusual case
1832-
value_set = set(self.values)
1832+
value_set = set(self._values)
18331833
result.extend([x for x in other._values if x not in value_set])
18341834
else:
18351835
indexer = self.get_indexer(other)
@@ -1838,10 +1838,10 @@ def union(self, other):
18381838
if len(indexer) > 0:
18391839
other_diff = algos.take_nd(other._values, indexer,
18401840
allow_fill=False)
1841-
result = _concat._concat_compat((self.values, other_diff))
1841+
result = _concat._concat_compat((self._values, other_diff))
18421842

18431843
try:
1844-
self.values[0] < other_diff[0]
1844+
self._values[0] < other_diff[0]
18451845
except TypeError as e:
18461846
warnings.warn("%s, sort order is undefined for "
18471847
"incomparable objects" % e, RuntimeWarning,
@@ -1853,7 +1853,7 @@ def union(self, other):
18531853
result.sort()
18541854

18551855
else:
1856-
result = self.values
1856+
result = self._values
18571857

18581858
try:
18591859
result = np.sort(result)
@@ -1906,17 +1906,17 @@ def intersection(self, other):
19061906

19071907
if self.is_monotonic and other.is_monotonic:
19081908
try:
1909-
result = self._inner_indexer(self.values, other._values)[0]
1909+
result = self._inner_indexer(self._values, other._values)[0]
19101910
return self._wrap_union_result(other, result)
19111911
except TypeError:
19121912
pass
19131913

19141914
try:
1915-
indexer = Index(self.values).get_indexer(other._values)
1915+
indexer = Index(self._values).get_indexer(other._values)
19161916
indexer = indexer.take((indexer != -1).nonzero()[0])
19171917
except:
19181918
# duplicates
1919-
indexer = Index(self.values).get_indexer_non_unique(
1919+
indexer = Index(self._values).get_indexer_non_unique(
19201920
other._values)[0].unique()
19211921
indexer = indexer[indexer != -1]
19221922

@@ -2536,7 +2536,7 @@ def _reindex_non_unique(self, target):
25362536
missing = _ensure_platform_int(missing)
25372537
missing_labels = target.take(missing)
25382538
missing_indexer = _ensure_int64(l[~check])
2539-
cur_labels = self.take(indexer[check])._values
2539+
cur_labels = self.take(indexer[check]).values
25402540
cur_indexer = _ensure_int64(l[check])
25412541

25422542
new_labels = np.empty(tuple([len(indexer)]), dtype=object)
@@ -2556,7 +2556,7 @@ def _reindex_non_unique(self, target):
25562556
else:
25572557

25582558
# need to retake to have the same size as the indexer
2559-
indexer = indexer._values
2559+
indexer = indexer.values
25602560
indexer[~check] = 0
25612561

25622562
# reset the new indexer to account for the new size
@@ -2879,7 +2879,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
28792879
else:
28802880
return ret_index
28812881

2882-
sv = self.values
2882+
sv = self._values
28832883
ov = other._values
28842884

28852885
if self.is_unique and other.is_unique:
@@ -3185,7 +3185,6 @@ def insert(self, loc, item):
31853185
"""
31863186
_self = np.asarray(self)
31873187
item = self._coerce_scalar_to_index(item)._values
3188-
31893188
idx = np.concatenate((_self[:loc], item, _self[loc:]))
31903189
return self._shallow_copy_with_infer(idx)
31913190

pandas/io/pytables.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,11 @@ def f(values, freq=None, tz=None):
23492349
return DatetimeIndex._simple_new(values, None, freq=freq,
23502350
tz=tz)
23512351
return f
2352+
elif klass == PeriodIndex:
2353+
def f(values, freq=None, tz=None):
2354+
return PeriodIndex._simple_new(values, None, freq=freq)
2355+
return f
2356+
23522357
return klass
23532358

23542359
def validate_read(self, kwargs):
@@ -2450,7 +2455,9 @@ def write_index(self, key, index):
24502455
setattr(self.attrs, '%s_variety' % key, 'regular')
24512456
converted = _convert_index(index, self.encoding,
24522457
self.format_type).set_name('index')
2458+
24532459
self.write_array(key, converted.values)
2460+
24542461
node = getattr(self.group, key)
24552462
node._v_attrs.kind = converted.kind
24562463
node._v_attrs.name = index.name
@@ -2552,12 +2559,12 @@ def read_index_node(self, node, start=None, stop=None):
25522559
kwargs['tz'] = node._v_attrs['tz']
25532560

25542561
if kind in (u('date'), u('datetime')):
2555-
index = factory(
2556-
_unconvert_index(data, kind, encoding=self.encoding),
2557-
dtype=object, **kwargs)
2562+
index = factory(_unconvert_index(data, kind,
2563+
encoding=self.encoding),
2564+
dtype=object, **kwargs)
25582565
else:
2559-
index = factory(
2560-
_unconvert_index(data, kind, encoding=self.encoding), **kwargs)
2566+
index = factory(_unconvert_index(data, kind,
2567+
encoding=self.encoding), **kwargs)
25612568

25622569
index.name = name
25632570

@@ -4377,9 +4384,10 @@ def _convert_index(index, encoding=None, format_type=None):
43774384
index_name=index_name)
43784385
elif isinstance(index, (Int64Index, PeriodIndex)):
43794386
atom = _tables().Int64Col()
4380-
return IndexCol(
4381-
index.values, 'integer', atom, freq=getattr(index, 'freq', None),
4382-
index_name=index_name)
4387+
# avoid to store ndarray of Period objects
4388+
return IndexCol(index._values, 'integer', atom,
4389+
freq=getattr(index, 'freq', None),
4390+
index_name=index_name)
43834391

43844392
if isinstance(index, MultiIndex):
43854393
raise TypeError('MultiIndex not supported here!')

pandas/tests/indexes/common.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,18 @@ def test_ensure_copied_data(self):
245245
tm.assert_numpy_array_equal(index.values, result.values,
246246
check_same='copy')
247247

248-
result = index_type(index.values, copy=False, **init_kwargs)
249-
tm.assert_numpy_array_equal(index.values, result.values,
250-
check_same='same')
248+
if not isinstance(index, PeriodIndex):
249+
result = index_type(index.values, copy=False, **init_kwargs)
250+
tm.assert_numpy_array_equal(index.values, result.values,
251+
check_same='same')
252+
tm.assert_numpy_array_equal(index._values, result._values,
253+
check_same='same')
254+
else:
255+
# .values an object array of Period, thus copied
256+
result = index_type(ordinal=index.asi8, copy=False,
257+
**init_kwargs)
258+
tm.assert_numpy_array_equal(index._values, result._values,
259+
check_same='same')
251260

252261
def test_copy_and_deepcopy(self):
253262
from copy import copy, deepcopy

pandas/tests/indexes/test_datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ def test_astype(self):
781781
idx = period_range('1990', '2009', freq='A')
782782
result = idx.astype('i8')
783783
self.assert_index_equal(result, Index(idx.asi8))
784-
self.assert_numpy_array_equal(result.values, idx.values)
784+
self.assert_numpy_array_equal(result.values, idx.asi8)
785785

786786
def test_astype_raises(self):
787787
# GH 13149, GH 13209

pandas/tests/indexing/test_coercion.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -490,16 +490,30 @@ def test_insert_index_period(self):
490490
self._assert_insert_conversion(obj, pd.Period('2012-01', freq='M'),
491491
exp, 'period[M]')
492492

493-
# ToDo: must coerce to object?
494-
exp = pd.PeriodIndex(['2011-01', '2012-01', '2011-02',
495-
'2011-03', '2011-04'], freq='M')
493+
# period + datetime64 => object
494+
exp = pd.Index([pd.Period('2011-01', freq='M'),
495+
pd.Timestamp('2012-01-01'),
496+
pd.Period('2011-02', freq='M'),
497+
pd.Period('2011-03', freq='M'),
498+
pd.Period('2011-04', freq='M')], freq='M')
496499
self._assert_insert_conversion(obj, pd.Timestamp('2012-01-01'),
497-
exp, 'period[M]')
500+
exp, np.object)
498501

499502
# period + int => object
500-
msg = "Given date string not likely a datetime."
501-
with tm.assertRaisesRegexp(ValueError, msg):
502-
print(obj.insert(1, 1))
503+
exp = pd.Index([pd.Period('2011-01', freq='M'),
504+
1,
505+
pd.Period('2011-02', freq='M'),
506+
pd.Period('2011-03', freq='M'),
507+
pd.Period('2011-04', freq='M')], freq='M')
508+
self._assert_insert_conversion(obj, 1, exp, np.object)
509+
510+
# period + object => object
511+
exp = pd.Index([pd.Period('2011-01', freq='M'),
512+
'x',
513+
pd.Period('2011-02', freq='M'),
514+
pd.Period('2011-03', freq='M'),
515+
pd.Period('2011-04', freq='M')], freq='M')
516+
self._assert_insert_conversion(obj, 'x', exp, np.object)
503517

504518

505519
class TestWhereCoercion(CoercionBase, tm.TestCase):

pandas/tests/indexing/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4137,8 +4137,8 @@ def test_series_partial_set_period(self):
41374137
idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
41384138
ser = Series([0.1, 0.2], index=idx, name='s')
41394139

4140-
result = ser.loc[[pd.Period('2011-01-01', freq='D'), pd.Period(
4141-
'2011-01-02', freq='D')]]
4140+
result = ser.loc[[pd.Period('2011-01-01', freq='D'),
4141+
pd.Period('2011-01-02', freq='D')]]
41424142
exp = Series([0.1, 0.2], index=idx, name='s')
41434143
tm.assert_series_equal(result, exp, check_index_type=True)
41444144

pandas/tests/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def test_ops(self):
393393
if not isinstance(o, PeriodIndex):
394394
expected = getattr(o.values, op)()
395395
else:
396-
expected = pd.Period(ordinal=getattr(o.values, op)(),
396+
expected = pd.Period(ordinal=getattr(o._values, op)(),
397397
freq=o.freq)
398398
try:
399399
self.assertEqual(result, expected)

pandas/tseries/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def sort_values(self, return_indexer=False, ascending=True):
323323
sorted_index = self.take(_as)
324324
return sorted_index, _as
325325
else:
326-
sorted_values = np.sort(self.values)
326+
sorted_values = np.sort(self._values)
327327
attribs = self._get_attributes_dict()
328328
freq = attribs['freq']
329329

pandas/tseries/converter.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,11 @@ def convert(values, units, axis):
141141
is_float(values)):
142142
return get_datevalue(values, axis.freq)
143143
if isinstance(values, PeriodIndex):
144-
return values.asfreq(axis.freq).values
144+
return values.asfreq(axis.freq)._values
145145
if isinstance(values, Index):
146146
return values.map(lambda x: get_datevalue(x, axis.freq))
147147
if is_period_arraylike(values):
148-
return PeriodIndex(values, freq=axis.freq).values
148+
return PeriodIndex(values, freq=axis.freq)._values
149149
if isinstance(values, (list, tuple, np.ndarray, Index)):
150150
return [get_datevalue(x, axis.freq) for x in values]
151151
return values
@@ -518,7 +518,7 @@ def _daily_finder(vmin, vmax, freq):
518518
info = np.zeros(span,
519519
dtype=[('val', np.int64), ('maj', bool),
520520
('min', bool), ('fmt', '|S20')])
521-
info['val'][:] = dates_.values
521+
info['val'][:] = dates_._values
522522
info['fmt'][:] = ''
523523
info['maj'][[0, -1]] = True
524524
# .. and set some shortcuts

0 commit comments

Comments
 (0)