Skip to content

CLN: Simplify Period Construction / Resolution #7607

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 6, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
6 changes: 4 additions & 2 deletions pandas/io/tests/generate_legacy_pickles.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def create_data():
from pandas import (Series,TimeSeries,DataFrame,Panel,
SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel,
Index,MultiIndex,PeriodIndex,
date_range,bdate_range,Timestamp)
date_range,period_range,bdate_range,Timestamp)
nan = np.nan

data = {
Expand All @@ -70,7 +70,9 @@ def create_data():
}

index = dict(int = Index(np.arange(10)),
date = date_range('20130101',periods=10))
date = date_range('20130101',periods=10),
period = period_range('2013-01-01', freq='M', periods=10))

mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
names=['first', 'second']))
Expand Down
55 changes: 31 additions & 24 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,40 @@ class FreqGroup(object):

class Resolution(object):

RESO_US = 0
RESO_SEC = 1
RESO_MIN = 2
RESO_HR = 3
RESO_DAY = 4
RESO_US = tslib.US_RESO
RESO_MS = tslib.MS_RESO
RESO_SEC = tslib.S_RESO
RESO_MIN = tslib.T_RESO
RESO_HR = tslib.H_RESO
RESO_DAY = tslib.D_RESO

_reso_str_map = {
RESO_US: 'microsecond',
RESO_MS: 'millisecond',
RESO_SEC: 'second',
RESO_MIN: 'minute',
RESO_HR: 'hour',
RESO_DAY: 'day'}

_reso_period_map = {
'year': 'A',
'quarter': 'Q',
'month': 'M',
'day': 'D',
'hour': 'H',
'minute': 'T',
'second': 'S',
'millisecond': 'L',
'microsecond': 'U',
'nanosecond': 'N'}

@classmethod
def get_str(cls, reso):
return {cls.RESO_US: 'microsecond',
cls.RESO_SEC: 'second',
cls.RESO_MIN: 'minute',
cls.RESO_HR: 'hour',
cls.RESO_DAY: 'day'}.get(reso, 'day')
return cls._reso_str_map.get(reso, 'day')

@classmethod
def get_freq(cls, resostr):
return cls._reso_period_map[resostr]

def get_reso_string(reso):
return Resolution.get_str(reso)
Expand Down Expand Up @@ -571,22 +591,9 @@ def _period_alias_dictionary():

return alias_dict

_reso_period_map = {
"year": "A",
"quarter": "Q",
"month": "M",
"day": "D",
"hour": "H",
"minute": "T",
"second": "S",
"millisecond": "L",
"microsecond": "U",
"nanosecond": "N",
}


def _infer_period_group(freqstr):
return _period_group(_reso_period_map[freqstr])
return _period_group(Resolution._reso_period_map[freqstr])


def _period_group(freqstr):
Expand Down
38 changes: 7 additions & 31 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,14 @@ def __init__(self, value=None, freq=None, ordinal=None,
elif isinstance(value, compat.string_types) or com.is_integer(value):
if com.is_integer(value):
value = str(value)
value = value.upper()

dt, freq = _get_date_and_freq(value, freq)
dt, _, reso = parse_time_string(value, freq)
if freq is None:
try:
freq = _freq_mod.Resolution.get_freq(reso)
except KeyError:
raise ValueError("Invalid frequency or could not infer: %s" % reso)

elif isinstance(value, datetime):
dt = value
Expand Down Expand Up @@ -451,36 +457,6 @@ def strftime(self, fmt):
return tslib.period_format(self.ordinal, base, fmt)


def _get_date_and_freq(value, freq):
value = value.upper()
dt, _, reso = parse_time_string(value, freq)

if freq is None:
if reso == 'year':
freq = 'A'
elif reso == 'quarter':
freq = 'Q'
elif reso == 'month':
freq = 'M'
elif reso == 'day':
freq = 'D'
elif reso == 'hour':
freq = 'H'
elif reso == 'minute':
freq = 'T'
elif reso == 'second':
freq = 'S'
elif reso == 'microsecond':
if dt.microsecond % 1000 == 0:
freq = 'L'
else:
freq = 'U'
else:
raise ValueError("Invalid frequency or could not infer: %s" % reso)

return dt, freq


def _get_ordinals(data, freq):
f = lambda x: Period(x, freq=freq).ordinal
if isinstance(data[0], Period):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,14 @@ def test_period_constructor(self):
i2 = Period(datetime(2007, 1, 1), freq='M')
self.assertEqual(i1, i2)

i1 = Period('2007-01-01 09:00:00.001')
expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L')
self.assertEqual(i1, expected)

i1 = Period('2007-01-01 09:00:00.00101')
expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U')
self.assertEqual(i1, expected)

self.assertRaises(ValueError, Period, ordinal=200701)

self.assertRaises(ValueError, Period, '2007-1-1', freq='X')
Expand Down
11 changes: 11 additions & 0 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,17 @@ def test_addition_subtraction_preserve_frequency(self):
self.assertEqual((timestamp_instance + timedelta64_instance).freq, original_freq)
self.assertEqual((timestamp_instance - timedelta64_instance).freq, original_freq)

def test_resolution(self):

for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'],
[tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, tslib.D_RESO,
tslib.H_RESO, tslib.T_RESO,tslib.S_RESO, tslib.MS_RESO, tslib.US_RESO]):
for tz in [None, 'Asia/Tokyo', 'US/Eastern']:
idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz)
result = tslib.resolution(idx.asi8, idx.tz)
self.assertEqual(result, expected)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
7 changes: 5 additions & 2 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,11 @@ def dateutil_parse(timestr, default,
if reso is None:
raise ValueError("Cannot parse date.")

if reso == 'microsecond' and repl['microsecond'] == 0:
reso = 'second'
if reso == 'microsecond':
if repl['microsecond'] == 0:
reso = 'second'
elif repl['microsecond'] % 1000 == 0:
reso = 'millisecond'

ret = default.replace(**repl)
if res.weekday is not None and not res.day:
Expand Down
11 changes: 7 additions & 4 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3372,13 +3372,16 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
return reso

US_RESO = 0
S_RESO = 1
T_RESO = 2
H_RESO = 3
D_RESO = 4
MS_RESO = 1
S_RESO = 2
T_RESO = 3
H_RESO = 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I not sure this is tested but in theory an older pickle will break as these codes have changed
can u preserve the original codes (and just add the new one)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I thought this is not stored as property. Modified.

D_RESO = 5

cdef inline int _reso_stamp(pandas_datetimestruct *dts):
if dts.us != 0:
if dts.us % 1000 == 0:
return MS_RESO
return US_RESO
elif dts.sec != 0:
return S_RESO
Expand Down