Skip to content

Commit 0e40ed4

Browse files
committed
Simplify Period/Datetime Array/Index constructors
1 parent 296c251 commit 0e40ed4

File tree

7 files changed

+63
-47
lines changed

7 files changed

+63
-47
lines changed

pandas/core/arrays/datetimelike.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.tseries.offsets import Tick, DateOffset
1919

2020
from pandas.core.dtypes.common import (
21+
pandas_dtype,
2122
needs_i8_conversion,
2223
is_list_like,
2324
is_offsetlike,
@@ -911,3 +912,34 @@ def validate_tz_from_dtype(dtype, tz):
911912
except TypeError:
912913
pass
913914
return tz
915+
916+
917+
def validate_dtype_freq(dtype, freq):
918+
"""
919+
If both a dtype and a freq are available, ensure they match. If only
920+
dtype is available, extract the implied freq.
921+
922+
Parameters
923+
----------
924+
dtype : dtype
925+
freq : DateOffset or None
926+
927+
Returns
928+
-------
929+
freq : DateOffset
930+
931+
Raises
932+
------
933+
ValueError : non-period dtype
934+
IncompatibleFrequency : mismatch between dtype and freq
935+
"""
936+
if dtype is not None:
937+
dtype = pandas_dtype(dtype)
938+
if not is_period_dtype(dtype):
939+
raise ValueError('dtype must be PeriodDtype')
940+
if freq is None:
941+
freq = dtype.freq
942+
elif freq != dtype.freq:
943+
raise IncompatibleFrequency('specified freq and dtype '
944+
'are different')
945+
return freq

pandas/core/arrays/datetimes.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
222222
@classmethod
223223
def _generate_range(cls, start, end, periods, freq, tz=None,
224224
normalize=False, ambiguous='raise', closed=None):
225+
226+
periods = dtl.validate_periods(periods)
227+
if freq is None and any(x is None for x in [periods, start, end]):
228+
raise ValueError('Must provide freq argument if no data is '
229+
'supplied')
230+
225231
if com.count_not_none(start, end, periods, freq) != 3:
226232
raise ValueError('Of the four parameters: start, end, periods, '
227233
'and freq, exactly three must be specified')
@@ -264,7 +270,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
264270
if freq is not None:
265271
if cls._use_cached_range(freq, _normalized, start, end):
266272
# Currently always False; never hit
267-
# Should be reimplemented as apart of GH 17914
273+
# Should be reimplemented as a part of GH#17914
268274
index = cls._cached_range(start, end, periods=periods,
269275
freq=freq)
270276
else:

pandas/core/arrays/period.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas.tseries import frequencies
2828
from pandas.tseries.offsets import Tick, DateOffset
2929

30+
from pandas.core.arrays import datetimelike as dtl
3031
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3132

3233

@@ -132,7 +133,7 @@ def __new__(cls, values, freq=None, **kwargs):
132133
# TODO: what if it has tz?
133134
values = dt64arr_to_periodarr(values, freq)
134135

135-
return cls._simple_new(values, freq, **kwargs)
136+
return cls._simple_new(values, freq=freq, **kwargs)
136137

137138
@classmethod
138139
def _simple_new(cls, values, freq=None, **kwargs):
@@ -141,21 +142,27 @@ def _simple_new(cls, values, freq=None, **kwargs):
141142
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
142143
"""
143144

145+
if is_period_dtype(values):
146+
freq = dtl.validate_dtype_freq(values.dtype, freq)
147+
values = values.asi8
148+
144149
if not is_integer_dtype(values):
145150
values = np.array(values, copy=False)
146151
if len(values) > 0 and is_float_dtype(values):
147152
raise TypeError("{cls} can't take floats"
148153
.format(cls=cls.__name__))
149-
return cls(values, freq=freq)
154+
return cls(values, freq=freq, **kwargs)
150155

151-
return cls._from_ordinals(values, freq)
156+
return cls._from_ordinals(values, freq=freq, **kwargs)
152157

153158
@classmethod
154-
def _from_ordinals(cls, values, freq=None):
159+
def _from_ordinals(cls, values, freq=None, **kwargs):
155160
"""
156161
Values should be int ordinals
157162
`__new__` & `_simple_new` cooerce to ordinals and call this method
158163
"""
164+
# **kwargs are included so that the signature matches PeriodIndex,
165+
# letting us share _simple_new
159166

160167
values = np.array(values, dtype='int64', copy=False)
161168

pandas/core/arrays/timedeltas.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,6 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None,
131131

132132
freq, freq_infer = dtl.maybe_infer_freq(freq)
133133

134-
if values is None:
135-
# TODO: Remove this block and associated kwargs; GH#20535
136-
if freq is None and com._any_none(periods, start, end):
137-
raise ValueError('Must provide freq argument if no data is '
138-
'supplied')
139-
periods = dtl.validate_periods(periods)
140-
return cls._generate_range(start, end, periods, freq,
141-
closed=closed)
142-
143134
result = cls._simple_new(values, freq=freq)
144135
if freq_infer:
145136
inferred = result.inferred_freq
@@ -151,6 +142,12 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None,
151142
@classmethod
152143
def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
153144
# **kwargs are for compat with TimedeltaIndex, which includes `name`
145+
146+
periods = dtl.validate_periods(periods)
147+
if freq is None and any(x is None for x in [periods, start, end]):
148+
raise ValueError('Must provide freq argument if no data is '
149+
'supplied')
150+
154151
if com.count_not_none(start, end, periods, freq) != 3:
155152
raise ValueError('Of the four parameters: start, end, periods, '
156153
'and freq, exactly three must be specified')

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,6 @@ def __new__(cls, data=None,
294294

295295
if data is None:
296296
# TODO: Remove this block and associated kwargs; GH#20535
297-
if freq is None and com._any_none(periods, start, end):
298-
raise ValueError('Must provide freq argument if no data is '
299-
'supplied')
300-
periods = dtl.validate_periods(periods)
301297
return cls._generate_range(start, end, periods, name, freq,
302298
tz=tz, normalize=normalize,
303299
closed=closed, ambiguous=ambiguous)

pandas/core/indexes/period.py

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
181181
if name is None and hasattr(data, 'name'):
182182
name = data.name
183183

184-
if dtype is not None:
185-
dtype = pandas_dtype(dtype)
186-
if not is_period_dtype(dtype):
187-
raise ValueError('dtype must be PeriodDtype')
188-
if freq is None:
189-
freq = dtype.freq
190-
elif freq != dtype.freq:
191-
msg = 'specified freq and dtype are different'
192-
raise IncompatibleFrequency(msg)
184+
freq = dtl.validate_dtype_freq(dtype, freq)
193185

194186
# coerce freq to freq object, otherwise it can be coerced elementwise
195187
# which is slow
@@ -218,7 +210,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
218210
# not array / index
219211
if not isinstance(data, (np.ndarray, PeriodIndex,
220212
DatetimeIndex, Int64Index)):
221-
if is_scalar(data) or isinstance(data, Period):
213+
if is_scalar(data):
222214
cls._scalar_data_error(data)
223215

224216
# other iterable of some kind
@@ -248,21 +240,7 @@ def _engine(self):
248240
return self._engine_type(lambda: self, len(self))
249241

250242
@classmethod
251-
def _simple_new(cls, values, name=None, freq=None, **kwargs):
252-
"""
253-
Values can be any type that can be coerced to Periods.
254-
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
255-
"""
256-
if not is_integer_dtype(values):
257-
values = np.array(values, copy=False)
258-
if len(values) > 0 and is_float_dtype(values):
259-
raise TypeError("PeriodIndex can't take floats")
260-
return cls(values, name=name, freq=freq, **kwargs)
261-
262-
return cls._from_ordinals(values, name, freq, **kwargs)
263-
264-
@classmethod
265-
def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
243+
def _from_ordinals(cls, values, freq=None, name=None):
266244
"""
267245
Values should be int ordinals
268246
`__new__` & `_simple_new` cooerce to ordinals and call this method

pandas/tests/indexes/period/test_construction.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,20 +264,20 @@ def test_constructor_mixed(self):
264264

265265
def test_constructor_simple_new(self):
266266
idx = period_range('2007-01', name='p', periods=2, freq='M')
267-
result = idx._simple_new(idx, 'p', freq=idx.freq)
267+
result = idx._simple_new(idx, name='p', freq=idx.freq)
268268
tm.assert_index_equal(result, idx)
269269

270-
result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
270+
result = idx._simple_new(idx.astype('i8'), name='p', freq=idx.freq)
271271
tm.assert_index_equal(result, idx)
272272

273273
result = idx._simple_new([pd.Period('2007-01', freq='M'),
274274
pd.Period('2007-02', freq='M')],
275-
'p', freq=idx.freq)
275+
name='p', freq=idx.freq)
276276
tm.assert_index_equal(result, idx)
277277

278278
result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'),
279279
pd.Period('2007-02', freq='M')]),
280-
'p', freq=idx.freq)
280+
name='p', freq=idx.freq)
281281
tm.assert_index_equal(result, idx)
282282

283283
def test_constructor_simple_new_empty(self):

0 commit comments

Comments
 (0)