From 071c37e92c79dc1369ff46117e035328b670e8cc Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 29 Jun 2014 13:28:56 +0900 Subject: [PATCH] CLN: Simplify Period construction / Resolution --- .../0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle | Bin 0 -> 7445 bytes .../0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle | Bin 0 -> 8159 bytes pandas/io/tests/generate_legacy_pickles.py | 6 +- pandas/tseries/frequencies.py | 55 ++++++++++-------- pandas/tseries/period.py | 38 +++--------- pandas/tseries/tests/test_period.py | 8 +++ pandas/tseries/tests/test_tslib.py | 11 ++++ pandas/tseries/tools.py | 7 ++- pandas/tslib.pyx | 11 ++-- 9 files changed, 73 insertions(+), 63 deletions(-) create mode 100644 pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle create mode 100644 pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle b/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle new file mode 100644 index 0000000000000000000000000000000000000000..4f5bdada3c7da54457f163bf993b396cec8ab557 GIT binary patch literal 7445 zcmd5>3!GF{8K2pm*~eWL*kutEL_md$zy`}gv_9BH6meoi;9{@ac4y}U8Wlzh}9u}EtDwbs_YKB>9J}Qk;7kcRToqNuG472P~@~he3Z~phr z{mys4_xGK%+bbLzRke5~pEfsBxT#dNSlX~n8tcjzb3MJyT?J+~=ZaZ7V=!j)DxJ*i zD&#Gj6}xPO#=ClqeA=*@ZOddC(`x37lx4ud&g9G$$X20MiAbLoP3>4stJOp_q4c@D zm9*X4V=A;pt5~@>5lKXWpjJ)9wCFj>Wb)!y;KRQX5l7W3@)j(GKfB*f4QQ((k-MIL zwCcgeNM!t$f`*u`i(QX+^L5uALab<>+wlKS4;}1TKpSS>gh#1Xc^I5m+lw5m+a1l)!p{c2bM3?C(z%wJJCI=tPH}4AGp+ zY&O&M6zNcIVwXZ$!^2delfa*o9eOHIcY+%YIweF6PBK~E$Xd#pOn!aVT$(L(ZCq*O zjrAr|=xMNcs!peoIPv{79W{;j5ztf>wu=ns1a4eIr>BEidAcc(Ogzo#8J-Ex6pSGN zE|CE+g9pIYaGETE0MO}7b-8?fA}W=#t!yTxtc5VOY$MmBP<6%pmPIWK=C#fXe3WDo z;&zr+*RowdfHYo)U^ovwp-`R(#D ze3#NkYEMYRBiPj5n$YIvA_Ut4H@8VLMSZhL)mjEmk--*(B3+^Od6`F z)mlBBqPoA`p`9vOC~KP4V=&8XmW7?)$@2aV(9CdX3fr|+Ou?K%yR>?Dg=Ci&MGJzk z8x-^_`Fd1v43FK-ueE}TU|1(`l)!p{qXo)ZLj2?v`nvsL4Oh3r?G+g>e>b4$wS5GF z^CH1Xz|g_z5;V2DI8mS~@T_)w`L;p#nY4VTD~iIqqNpJo6JtVAl0`|NR^ZsuDu%n< zkY$fg9cYV^T+RzFxdE*PClt(A8wbe(^nD1Go{;0{|Bd6IQ7YY;Y_{`qBU>~n^GrzQ zXw@kZ&z2VJ#K^S0`K1V5bv41@z1*RjRMMq2ikQU-QwHv45xBiN-2(di<>CsVKiJ$G zsJD)ApoU1lf66@_FE&IXQ*P3CA?7*`9YnnCf&RHK0oJ7_ZABcM@W}d?0hiyHy9IG~ zTSa^j@X7tV*C4j)v-cuSYj|eoVZfff&SQxC7BtR20yz548+sA-b!Xpwq+#g6duh?i zwxw@bQ`%6H#AtOL*ZSUvMmwd?Y zkS=O9P{g|#4vz10==+Z<;rx-}mNeKp8W$p|LU)48uXgA)mZA_NwPc(5^8){m|X27VXfYOSS0vT6CGo>v6~lA4Ov5&B0{5 zc7(|Wee<++010NpC&JkP?e039x%_naBx+L4o5o^{PyaLWrr|dU z2!93G`;+^pkbSg|Kn;)KTu@TtKOK0Cq0h_v9tiu8jGsBNGVVq8(5w zg&hk08lj zo81O%r#d%g@@aaj7Ej@60sXc%&U+G=1-|3W0>zX?mN@jgT8x?NTj=*dPk$$N8@}G( ziG3ff;OEp2@TsU&`(W3SG8p|K!WMeFyM6zWR!NJS=^e0n53T$$u7pTO``sz_J9oMJ zou2?L82s+=tM0)CHGCRw_oKnRlG`eYayunaZUZEi$Z~HP){>Qc)kyC_blAb|hx457 z46|%{9}ZP;QkkYd#ciAgzw-w4Gl$->i`pa+t{wiSl z2g^4Je=G4268|TMLg0r3VLv>KtbkcR)|KNpCJ1@|5syi5gFT#$OWgLT#{~AJAjxxk z4)1D$d9JPH>cLf$?rKcH>m$mz9>8yAbF~MQTStkQ2F;vV4U%wFY(qB0-y1LvRd$`Py$tS&9de09d z4u#bETR^(Mhopn?E=3k{)n4SPy`SDi79S2RCT*bFk&tTt@KhUlcO2a2syti= z5{F{{45{|7fOP*3Nyk-N!c}_(RbzO-q_Ze|hZJNL_>1R0R*ARnaOjPNUfpIfLF|=o z78eY?>dmUc468Xqtl_K%E_oSewQyBZyO|=^!MPeF4Xbn4!HE?ch3n!rVW!FI@fr`X z*l4)o`NGB^Y0wc25AaZ_lHybvTtLM?62n={8p?B#At$-Xd!GvwYu1R1$$f+uXj5n7 z_UG($|@vM*s1Uw0(Ac%otlww^uGsV>T- zlo)9j&jrd8Yl&mU?=q<+9`Aa|N1j_rK5h|UGhIb|m#I<_*JZQtb_3dCgKq?^2v)KA ERs0kyq5uE@ literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle b/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle new file mode 100644 index 0000000000000000000000000000000000000000..1b1652fc03c270068d6e58aad2e736b32a74e9ba GIT binary patch literal 8159 zcmd5>3!D{I8NYY;-hIrnz%GlRAOb23uN9W%A&~5@B;t+hDhxfRZSLM#=E8mKIWt#S zWJ06eZ8L;kWMxHUmOU*?dst+qsZ^Gws2OIZ`KUBXUF@OXcjlaVahF~A$**RAzx`(B zeDj^}`+tw~opY}$vq@A|;+cF}-%8-2TvlRf&C*G%GhfVg_cV4EsNR?>X041ysn#QP zP`$H|H!WK1v?LPm?AG#W&1|$xon~~ikqMGkfdng))7PL_iBu#ay-GCIe>tg?6VZg! z>&TYUR!_Gskt(HZ?b1Xf5e0^7IT2H$E0W3NC9lH8{v{%|td!+V_!R!EJ}Wh#v_~R$ zKmBOMgY}Wf_-zFRX}TtM1JYZrzwQXqvc`F>4*|XGz#Y4h?rYz080qs*JUjOg&I>QBy;p?EbCT}fn=8)C9kd1R7GXSNG zK&e4alpl){$0fq9s6!%UoR)JM<224`1*esqR&iR*sg>*ntt-00y@fOx?YDw$@)4bM;0(b9OY0hY=fyU1`(VBl&hIU8VQ=_Wxku{0y+xDcMp z1w#N_Dgs~z3xMsxG+7A&ppu#LYB7BxDvYwsY$heGhcGoQE!Qm(dCh{R#Z3$6H_!K7 zQZfl~J4-3=Dip}<)h%3(ZlyrSIRR_up0u_7DJ7QIayprpET)V)13l4blk;R_ih;n+ zSIL6FR81#6l}Z+ZBoE2v<46*i373gyX8WRmLW_?hhKFV(oNkV%`%?y5t;D;s1R3cvW~bR?BMXiN{&a1L#sF)+`;f^ z3FzE<(mD%#%5oXf#=yDICaVNEZ&b-eUj6YE&b7Ea@zS%}&6|r+jYzLCV?ecJm8_8i za9o0;Wh`CC3ZDz-S*ZYP0$Ix$>%_2MMH#hHx=P*@oMLSW95{?_We`&f8k#y;LWE!q zi}JMBlPbAXR>})`owO_E)|LXfOt<8n|e*r6OgwuL!-J&5s;R#1$G z^oAe|Z^qG)A^jF)9SPEJ4NmbkZim12hT##g2^vEB?IlDA*087`((jOscEiEfjbFBO z_E3fol?B5#6BvR<0E5h_$6%99HU}}thA`lMD=O7ycL(n!ayH4!#uBlDn%x>T^+wS~ zuqV+>6aeUMn-FXc&0-3=T0)gt=L;duiC_Zm<>F6NRr8M=Hp3=J&~oye)o>FgG3 z^^QSjOd#Lks3Nf*j92+>i7|noy%IHn8iP%8Gv7*VA98VHJuYGqssT2avVB0Q!hRU@ z+`?_kyP?a3$lVce+*SWKj)Rs;>dIuZ9am`CqE4=MAvESGl_?(2?@?kM7@0fWZuz~? zf2jrrZ?8@IWP>X8JZ5n}BLcUd2k!e+G7!kz_jCIJb9ei8%?P(|5b5_%xwrMjx=3Wo z&FUVcxz;0xk>375-@KQA)}$wGM>;s+k&Q0{U42vTR-}8EmBj~vK6!BOI;73&oc&0r z)jhN8DA4Zx_G3s7EUcgN3eeGa-PnUv-LU+gSL%k|=Cu{gOkOulw%z*xfU?Ub*UCzD zzCd$YHnUA9y9bmCzW3V$1r>IAY@heRP%%--hX5xpOFoP^ow6)3!m02PUl3y=m5B3? z`d9*SzSoKKj|KdE9ryD!1IquWe-r-~zGzcJMQF}QR5 z97wo9C7*}(MU*#RAIpo|jc$Sc0zw~SEiCz>tVCOtXqytfP>HV6S;-Fg=_X``Zo;3B zUm9UP!b<=8=A)Hl4*RlAzH*Giz8Z8GG|peccFxO~ucIZEY+YH3A?#fvUst?!1TsVd z@!mG@W)vUoHK5GLu#*X`0^jfrV|K6=;G2*c0Ud8SwK{$a#lwPzOA(rnomEKBZ~Iu# z3_RIky51Vr^)}>$$bfa)ZRvm+u94&E@K0`W=u}Rp`_}7YaQ+TX%W=lfLi&H#H;j!6 z)BSs4-M^1q6-@W}D8##l*Znv#aJz4E6dBU(2fkqkdguav2)aSA9>8Gra`Z=>6}Fk= z4oFc=WTwaRV;{?%kisQce&S=f3sSrU%iTVfpF#<6Sac|NJC$lY!*WW|JxXmyTJO?e zPu#INlTVX-m3Rt|ugHDMICsR?E*x~*g<{Gy+HCSOB}Vm)P2}fbr?<)b1y1j6^6p0z zyqx+aj`B+NE7*FbG)f*ovW@)OIZyhHQcj9n$%Am36!`KX{1PG^@jJvftq(h!)<=LB z0Dm+%)o<~GD%LQ!c+ue8i5;7uVy7mk*tiH9(&e!dqJ=0i)kvQ~b=Zdg4h|T*GSsxl zVeGr$=sZmx$Gx}-@7V(Mdz(BV8^Oa$HiD<J z@OB$dBlF3eQi30z3Hsq#6a~Wiu&$iIc~QXo&$&YUd-vfm4!P|wt`OKsgC@`IJG!R< zTE1mX*FKJP;LhodEb8rr)A?WL9YEhGx9f*X}qBdZ@VD&ZT?@EUyd%Q9Ag z;iKSf!0=k}6_583uA_rH95aVotDJLGI@+1mGklDg)-fDUDg(+g!`qnQb@(kBUJo}a z9fzy|!yE98XZU!03>faK_VhH)>=t}FfeUnNKAjj8csdh^m7ksgx2*WnNpKgc_;fP= z4i5St)##bdcW_uqr{H&S%b>bW&%%2=yQEX$j%690hO7aK$z#?1Ee*KY#Y#LqhbeW~U}YU*=Un#eTO8>OF8&0IC7!hgjGgJ1Q>tDHg~jqF ze(b|#l*(K$FS=$4d5$Ay!y^R5v8@N46J(sr7i4%==Va7I1>|c fcuhFpF~sxQGGU10wdMG@3E|k};ed6)EH=IdbIz@n literal 0 HcmV?d00001 diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 48d0fd57d831b..3a0386c7660d4 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -58,7 +58,7 @@ def create_data(): from pandas import (Series,TimeSeries,DataFrame,Panel, SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel, Index,MultiIndex,PeriodIndex, - date_range,bdate_range,Timestamp) + date_range,period_range,bdate_range,Timestamp) nan = np.nan data = { @@ -70,7 +70,9 @@ def create_data(): } index = dict(int = Index(np.arange(10)), - date = date_range('20130101',periods=10)) + date = date_range('20130101',periods=10), + period = period_range('2013-01-01', freq='M', periods=10)) + mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 441a5e8a99c78..fe61e5f0acd9b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -30,20 +30,40 @@ class FreqGroup(object): class Resolution(object): - RESO_US = 0 - RESO_SEC = 1 - RESO_MIN = 2 - RESO_HR = 3 - RESO_DAY = 4 + RESO_US = tslib.US_RESO + RESO_MS = tslib.MS_RESO + RESO_SEC = tslib.S_RESO + RESO_MIN = tslib.T_RESO + RESO_HR = tslib.H_RESO + RESO_DAY = tslib.D_RESO + + _reso_str_map = { + RESO_US: 'microsecond', + RESO_MS: 'millisecond', + RESO_SEC: 'second', + RESO_MIN: 'minute', + RESO_HR: 'hour', + RESO_DAY: 'day'} + + _reso_period_map = { + 'year': 'A', + 'quarter': 'Q', + 'month': 'M', + 'day': 'D', + 'hour': 'H', + 'minute': 'T', + 'second': 'S', + 'millisecond': 'L', + 'microsecond': 'U', + 'nanosecond': 'N'} @classmethod def get_str(cls, reso): - return {cls.RESO_US: 'microsecond', - cls.RESO_SEC: 'second', - cls.RESO_MIN: 'minute', - cls.RESO_HR: 'hour', - cls.RESO_DAY: 'day'}.get(reso, 'day') + return cls._reso_str_map.get(reso, 'day') + @classmethod + def get_freq(cls, resostr): + return cls._reso_period_map[resostr] def get_reso_string(reso): return Resolution.get_str(reso) @@ -571,22 +591,9 @@ def _period_alias_dictionary(): return alias_dict -_reso_period_map = { - "year": "A", - "quarter": "Q", - "month": "M", - "day": "D", - "hour": "H", - "minute": "T", - "second": "S", - "millisecond": "L", - "microsecond": "U", - "nanosecond": "N", -} - def _infer_period_group(freqstr): - return _period_group(_reso_period_map[freqstr]) + return _period_group(Resolution._reso_period_map[freqstr]) def _period_group(freqstr): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 77e9677f0b723..cceac61f392a8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -111,8 +111,14 @@ def __init__(self, value=None, freq=None, ordinal=None, elif isinstance(value, compat.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) + value = value.upper() - dt, freq = _get_date_and_freq(value, freq) + dt, _, reso = parse_time_string(value, freq) + if freq is None: + try: + freq = _freq_mod.Resolution.get_freq(reso) + except KeyError: + raise ValueError("Invalid frequency or could not infer: %s" % reso) elif isinstance(value, datetime): dt = value @@ -451,36 +457,6 @@ def strftime(self, fmt): return tslib.period_format(self.ordinal, base, fmt) -def _get_date_and_freq(value, freq): - value = value.upper() - dt, _, reso = parse_time_string(value, freq) - - if freq is None: - if reso == 'year': - freq = 'A' - elif reso == 'quarter': - freq = 'Q' - elif reso == 'month': - freq = 'M' - elif reso == 'day': - freq = 'D' - elif reso == 'hour': - freq = 'H' - elif reso == 'minute': - freq = 'T' - elif reso == 'second': - freq = 'S' - elif reso == 'microsecond': - if dt.microsecond % 1000 == 0: - freq = 'L' - else: - freq = 'U' - else: - raise ValueError("Invalid frequency or could not infer: %s" % reso) - - return dt, freq - - def _get_ordinals(data, freq): f = lambda x: Period(x, freq=freq).ordinal if isinstance(data[0], Period): diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index c4bac4b9b14f0..42edb799b4c89 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -227,6 +227,14 @@ def test_period_constructor(self): i2 = Period(datetime(2007, 1, 1), freq='M') self.assertEqual(i1, i2) + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + self.assertRaises(ValueError, Period, ordinal=200701) self.assertRaises(ValueError, Period, '2007-1-1', freq='X') diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 72ab9141609b4..82f05a0de4588 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -476,6 +476,17 @@ def test_addition_subtraction_preserve_frequency(self): self.assertEqual((timestamp_instance + timedelta64_instance).freq, original_freq) self.assertEqual((timestamp_instance - timedelta64_instance).freq, original_freq) + def test_resolution(self): + + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], + [tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, + tslib.H_RESO, tslib.T_RESO,tslib.S_RESO, tslib.MS_RESO, tslib.US_RESO]): + for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) + result = tslib.resolution(idx.asi8, idx.tz) + self.assertEqual(result, expected) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f8043b23a58af..b4ab813d3debe 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -508,8 +508,11 @@ def dateutil_parse(timestr, default, if reso is None: raise ValueError("Cannot parse date.") - if reso == 'microsecond' and repl['microsecond'] == 0: - reso = 'second' + if reso == 'microsecond': + if repl['microsecond'] == 0: + reso = 'second' + elif repl['microsecond'] % 1000 == 0: + reso = 'millisecond' ret = default.replace(**repl) if res.weekday is not None and not res.day: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 39d80521fbdb5..090b49bde68a6 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3372,13 +3372,16 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): return reso US_RESO = 0 -S_RESO = 1 -T_RESO = 2 -H_RESO = 3 -D_RESO = 4 +MS_RESO = 1 +S_RESO = 2 +T_RESO = 3 +H_RESO = 4 +D_RESO = 5 cdef inline int _reso_stamp(pandas_datetimestruct *dts): if dts.us != 0: + if dts.us % 1000 == 0: + return MS_RESO return US_RESO elif dts.sec != 0: return S_RESO