Skip to content

Commit e4b67ca

Browse files
jbrockmendeljreback
authored andcommitted
REF: Simplify Period/Datetime Array/Index constructors (#23093)
1 parent 12a0dc4 commit e4b67ca

File tree

8 files changed

+71
-60
lines changed

8 files changed

+71
-60
lines changed

pandas/core/arrays/datetimelike.py

+32
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.tseries.offsets import Tick, DateOffset
1919

2020
from pandas.core.dtypes.common import (
21+
pandas_dtype,
2122
needs_i8_conversion,
2223
is_list_like,
2324
is_offsetlike,
@@ -901,3 +902,34 @@ def validate_tz_from_dtype(dtype, tz):
901902
except TypeError:
902903
pass
903904
return tz
905+
906+
907+
def validate_dtype_freq(dtype, freq):
908+
"""
909+
If both a dtype and a freq are available, ensure they match. If only
910+
dtype is available, extract the implied freq.
911+
912+
Parameters
913+
----------
914+
dtype : dtype
915+
freq : DateOffset or None
916+
917+
Returns
918+
-------
919+
freq : DateOffset
920+
921+
Raises
922+
------
923+
ValueError : non-period dtype
924+
IncompatibleFrequency : mismatch between dtype and freq
925+
"""
926+
if dtype is not None:
927+
dtype = pandas_dtype(dtype)
928+
if not is_period_dtype(dtype):
929+
raise ValueError('dtype must be PeriodDtype')
930+
if freq is None:
931+
freq = dtype.freq
932+
elif freq != dtype.freq:
933+
raise IncompatibleFrequency('specified freq and dtype '
934+
'are different')
935+
return freq

pandas/core/arrays/datetimes.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
222222
@classmethod
223223
def _generate_range(cls, start, end, periods, freq, tz=None,
224224
normalize=False, ambiguous='raise', closed=None):
225+
226+
periods = dtl.validate_periods(periods)
227+
if freq is None and any(x is None for x in [periods, start, end]):
228+
raise ValueError('Must provide freq argument if no data is '
229+
'supplied')
230+
225231
if com.count_not_none(start, end, periods, freq) != 3:
226232
raise ValueError('Of the four parameters: start, end, periods, '
227233
'and freq, exactly three must be specified')
@@ -264,7 +270,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
264270
if freq is not None:
265271
if cls._use_cached_range(freq, _normalized, start, end):
266272
# Currently always False; never hit
267-
# Should be reimplemented as apart of GH 17914
273+
# Should be reimplemented as a part of GH#17914
268274
index = cls._cached_range(start, end, periods=periods,
269275
freq=freq)
270276
else:

pandas/core/arrays/period.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas.tseries import frequencies
2828
from pandas.tseries.offsets import Tick, DateOffset
2929

30+
from pandas.core.arrays import datetimelike as dtl
3031
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3132

3233

@@ -132,7 +133,7 @@ def __new__(cls, values, freq=None, **kwargs):
132133
# TODO: what if it has tz?
133134
values = dt64arr_to_periodarr(values, freq)
134135

135-
return cls._simple_new(values, freq, **kwargs)
136+
return cls._simple_new(values, freq=freq, **kwargs)
136137

137138
@classmethod
138139
def _simple_new(cls, values, freq=None, **kwargs):
@@ -141,21 +142,27 @@ def _simple_new(cls, values, freq=None, **kwargs):
141142
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
142143
"""
143144

145+
if is_period_dtype(values):
146+
freq = dtl.validate_dtype_freq(values.dtype, freq)
147+
values = values.asi8
148+
144149
if not is_integer_dtype(values):
145150
values = np.array(values, copy=False)
146151
if len(values) > 0 and is_float_dtype(values):
147152
raise TypeError("{cls} can't take floats"
148153
.format(cls=cls.__name__))
149-
return cls(values, freq=freq)
154+
return cls(values, freq=freq, **kwargs)
150155

151-
return cls._from_ordinals(values, freq)
156+
return cls._from_ordinals(values, freq=freq, **kwargs)
152157

153158
@classmethod
154-
def _from_ordinals(cls, values, freq=None):
159+
def _from_ordinals(cls, values, freq=None, **kwargs):
155160
"""
156161
Values should be int ordinals
157162
`__new__` & `_simple_new` cooerce to ordinals and call this method
158163
"""
164+
# **kwargs are included so that the signature matches PeriodIndex,
165+
# letting us share _simple_new
159166

160167
values = np.array(values, dtype='int64', copy=False)
161168

pandas/core/arrays/timedeltas.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,6 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None,
131131

132132
freq, freq_infer = dtl.maybe_infer_freq(freq)
133133

134-
if values is None:
135-
# TODO: Remove this block and associated kwargs; GH#20535
136-
if freq is None and com._any_none(periods, start, end):
137-
raise ValueError('Must provide freq argument if no data is '
138-
'supplied')
139-
periods = dtl.validate_periods(periods)
140-
return cls._generate_range(start, end, periods, freq,
141-
closed=closed)
142-
143134
result = cls._simple_new(values, freq=freq)
144135
if freq_infer:
145136
inferred = result.inferred_freq
@@ -151,6 +142,12 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None,
151142
@classmethod
152143
def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
153144
# **kwargs are for compat with TimedeltaIndex, which includes `name`
145+
146+
periods = dtl.validate_periods(periods)
147+
if freq is None and any(x is None for x in [periods, start, end]):
148+
raise ValueError('Must provide freq argument if no data is '
149+
'supplied')
150+
154151
if com.count_not_none(start, end, periods, freq) != 3:
155152
raise ValueError('Of the four parameters: start, end, periods, '
156153
'and freq, exactly three must be specified')

pandas/core/indexes/datetimes.py

-4
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,6 @@ def __new__(cls, data=None,
241241

242242
if data is None:
243243
# TODO: Remove this block and associated kwargs; GH#20535
244-
if freq is None and com._any_none(periods, start, end):
245-
raise ValueError('Must provide freq argument if no data is '
246-
'supplied')
247-
periods = dtl.validate_periods(periods)
248244
return cls._generate_range(start, end, periods, name, freq,
249245
tz=tz, normalize=normalize,
250246
closed=closed, ambiguous=ambiguous)

pandas/core/indexes/period.py

+9-36
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
is_integer,
99
is_float,
1010
is_integer_dtype,
11-
is_float_dtype,
1211
is_scalar,
1312
is_datetime64_dtype,
1413
is_datetime64_any_dtype,
@@ -171,15 +170,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
171170
if name is None and hasattr(data, 'name'):
172171
name = data.name
173172

174-
if dtype is not None:
175-
dtype = pandas_dtype(dtype)
176-
if not is_period_dtype(dtype):
177-
raise ValueError('dtype must be PeriodDtype')
178-
if freq is None:
179-
freq = dtype.freq
180-
elif freq != dtype.freq:
181-
msg = 'specified freq and dtype are different'
182-
raise IncompatibleFrequency(msg)
173+
freq = dtl.validate_dtype_freq(dtype, freq)
183174

184175
# coerce freq to freq object, otherwise it can be coerced elementwise
185176
# which is slow
@@ -192,7 +183,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
192183
else:
193184
data, freq = cls._generate_range(start, end, periods,
194185
freq, fields)
195-
return cls._from_ordinals(data, name=name, freq=freq)
186+
return cls._simple_new(data, name=name, freq=freq)
196187

197188
if isinstance(data, PeriodIndex):
198189
if freq is None or freq == data.freq: # no freq change
@@ -208,7 +199,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
208199
# not array / index
209200
if not isinstance(data, (np.ndarray, PeriodIndex,
210201
DatetimeIndex, Int64Index)):
211-
if is_scalar(data) or isinstance(data, Period):
202+
if is_scalar(data):
212203
cls._scalar_data_error(data)
213204

214205
# other iterable of some kind
@@ -220,7 +211,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
220211
# datetime other than period
221212
if is_datetime64_dtype(data.dtype):
222213
data = dt64arr_to_periodarr(data, freq, tz)
223-
return cls._from_ordinals(data, name=name, freq=freq)
214+
return cls._simple_new(data, name=name, freq=freq)
224215

225216
# check not floats
226217
if infer_dtype(data) == 'floating' and len(data) > 0:
@@ -231,33 +222,15 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
231222
data = ensure_object(data)
232223
freq = freq or period.extract_freq(data)
233224
data = period.extract_ordinals(data, freq)
234-
return cls._from_ordinals(data, name=name, freq=freq)
225+
return cls._simple_new(data, name=name, freq=freq)
235226

236227
@cache_readonly
237228
def _engine(self):
238229
return self._engine_type(lambda: self, len(self))
239230

240231
@classmethod
241-
def _simple_new(cls, values, name=None, freq=None, **kwargs):
242-
"""
243-
Values can be any type that can be coerced to Periods.
244-
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
245-
"""
246-
if not is_integer_dtype(values):
247-
values = np.array(values, copy=False)
248-
if len(values) > 0 and is_float_dtype(values):
249-
raise TypeError("PeriodIndex can't take floats")
250-
return cls(values, name=name, freq=freq, **kwargs)
251-
252-
return cls._from_ordinals(values, name, freq, **kwargs)
253-
254-
@classmethod
255-
def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
256-
"""
257-
Values should be int ordinals
258-
`__new__` & `_simple_new` cooerce to ordinals and call this method
259-
"""
260-
result = super(PeriodIndex, cls)._from_ordinals(values, freq)
232+
def _simple_new(cls, values, freq=None, name=None, **kwargs):
233+
result = super(PeriodIndex, cls)._simple_new(values, freq)
261234

262235
result.name = name
263236
result._reset_identity()
@@ -702,8 +675,8 @@ def _wrap_union_result(self, other, result):
702675

703676
def _apply_meta(self, rawarr):
704677
if not isinstance(rawarr, PeriodIndex):
705-
rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq,
706-
name=self.name)
678+
rawarr = PeriodIndex._simple_new(rawarr, freq=self.freq,
679+
name=self.name)
707680
return rawarr
708681

709682
def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):

pandas/io/pytables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2476,15 +2476,15 @@ def _get_index_factory(self, klass):
24762476
if klass == DatetimeIndex:
24772477
def f(values, freq=None, tz=None):
24782478
# data are already in UTC, localize and convert if tz present
2479-
result = DatetimeIndex._simple_new(values.values, None,
2479+
result = DatetimeIndex._simple_new(values.values, name=None,
24802480
freq=freq)
24812481
if tz is not None:
24822482
result = result.tz_localize('UTC').tz_convert(tz)
24832483
return result
24842484
return f
24852485
elif klass == PeriodIndex:
24862486
def f(values, freq=None, tz=None):
2487-
return PeriodIndex._simple_new(values, None, freq=freq)
2487+
return PeriodIndex._simple_new(values, name=None, freq=freq)
24882488
return f
24892489

24902490
return klass

pandas/tests/indexes/period/test_construction.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -264,20 +264,20 @@ def test_constructor_mixed(self):
264264

265265
def test_constructor_simple_new(self):
266266
idx = period_range('2007-01', name='p', periods=2, freq='M')
267-
result = idx._simple_new(idx, 'p', freq=idx.freq)
267+
result = idx._simple_new(idx, name='p', freq=idx.freq)
268268
tm.assert_index_equal(result, idx)
269269

270-
result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
270+
result = idx._simple_new(idx.astype('i8'), name='p', freq=idx.freq)
271271
tm.assert_index_equal(result, idx)
272272

273273
result = idx._simple_new([pd.Period('2007-01', freq='M'),
274274
pd.Period('2007-02', freq='M')],
275-
'p', freq=idx.freq)
275+
name='p', freq=idx.freq)
276276
tm.assert_index_equal(result, idx)
277277

278278
result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'),
279279
pd.Period('2007-02', freq='M')]),
280-
'p', freq=idx.freq)
280+
name='p', freq=idx.freq)
281281
tm.assert_index_equal(result, idx)
282282

283283
def test_constructor_simple_new_empty(self):

0 commit comments

Comments
 (0)