From 01b3399a017a3c608900e81813296593b2f396ab Mon Sep 17 00:00:00 2001 From: John McNamara Date: Wed, 23 Apr 2014 01:15:49 +0100 Subject: [PATCH] BUG: Fix to read decimal seconds from Excel. Fix to allow decimal seconds to be read from Excel dates and times into datetime objects. #5945. --- doc/source/release.rst | 1 + pandas/io/excel.py | 45 ++++++++++++++++++----- pandas/io/tests/data/times_1900.xls | Bin 0 -> 16384 bytes pandas/io/tests/data/times_1904.xls | Bin 0 -> 16384 bytes pandas/io/tests/test_excel.py | 54 +++++++++++++++++++++++++--- 5 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 pandas/io/tests/data/times_1900.xls create mode 100644 pandas/io/tests/data/times_1904.xls diff --git a/doc/source/release.rst b/doc/source/release.rst index 47407eedb17bd..49656046129ca 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -285,6 +285,7 @@ Improvements to existing features - Translate ``sep='\s+'`` to ``delim_whitespace=True`` in :func:`read_csv`/:func:`read_table` if no other C-unsupported options specified (:issue:`6607`) +- ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`) .. _release.bug_fixes-0.14.0: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index fef5a24e6ea20..f4f40c8be7855 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -18,6 +18,7 @@ import pandas.compat as compat import pandas.core.common as com from warnings import warn +from distutils.version import LooseVersion __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -250,11 +251,19 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, convert_float=True, **kwds): - from xlrd import (xldate_as_tuple, XL_CELL_DATE, + import xlrd + from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, XL_CELL_NUMBER) - datemode = self.book.datemode + epoch1904 = self.book.datemode + + # xlrd >= 0.9.3 can return datetime objects directly. + if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): + xlrd_0_9_3 = True + else: + xlrd_0_9_3 = False + if isinstance(sheetname, compat.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string @@ -271,12 +280,29 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, if parse_cols is None or should_parse[j]: if typ == XL_CELL_DATE: - dt = xldate_as_tuple(value, datemode) - # how to produce this first case? - if dt[0] < datetime.MINYEAR: # pragma: no cover - value = datetime.time(*dt[3:]) + if xlrd_0_9_3: + # Use the newer xlrd datetime handling. + value = xldate.xldate_as_datetime(value, epoch1904) + + # Excel doesn't distinguish between dates and time, + # so we treat dates on the epoch as times only. + # Also, Excel supports 1900 and 1904 epochs. + year = (value.timetuple())[0:3] + if ((not epoch1904 and year == (1899, 12, 31)) + or (epoch1904 and year == (1904, 1, 1))): + value = datetime.time(value.hour, + value.minute, + value.second, + value.microsecond) else: - value = datetime.datetime(*dt) + # Use the xlrd <= 0.9.2 date handling. + dt = xldate.xldate_as_tuple(value, epoch1904) + + if dt[0] < datetime.MINYEAR: + value = datetime.time(*dt[3:]) + else: + value = datetime.datetime(*dt) + elif typ == XL_CELL_ERROR: value = np.nan elif typ == XL_CELL_BOOLEAN: @@ -727,8 +753,9 @@ def __init__(self, path, engine=None, import xlsxwriter super(_XlsxWriter, self).__init__(path, engine=engine, - date_format=date_format, datetime_format=datetime_format, - **engine_kwargs) + date_format=date_format, + datetime_format=datetime_format, + **engine_kwargs) self.book = xlsxwriter.Workbook(path, **engine_kwargs) diff --git a/pandas/io/tests/data/times_1900.xls b/pandas/io/tests/data/times_1900.xls new file mode 100644 index 0000000000000000000000000000000000000000..e9a62b2c25da968e90b607a72efa4e9547e98048 GIT binary patch literal 16384 zcmeHOdvH|M8UJ>(Nj89x1c(R{R^E>ULjuS{c6o@88pKf??0|zI*(E6F2v-jTYjx$bm zJehO%-tV6C`yS^z-+A1Vr=O}ga?{--Ulk)orAR(bl}U+>9>6`bO64M>aKq=1Q>m0D zis1H(^ck|iN04Ar$!N^09;Q~^DT!}msc^L9= z2oXU1=5MXq(o$?bl|s3wxWgdPgU?3%Q!()i-dU< zIjF93rvA~_OK-S6dDfbz0w?3KM;DWo;NB(cWv!B9y}fu|_mvV1&~Ggn@~K4D@6FI9 zi{(P;mJRr8Qo5f5DculLgY%fjfzCB<^4iED*Cn9uzpLN-fxL$KP3VZGwfkX8jzX&ln|8g zZ~XwqM5onFE*vraF@hK${YwPkh5d0U^TenQjX+$+m^}98vEypSjjNl#y4Jd_4voeL zkFnVz%wKguyyX&mfocr@7`Uwh)9dW@@l+ie2k|hJ+e;WIs@N=`j$b4!;LMQ8D+v+ui=_o9=VTlvp3Js=UPr&CbdlapL!hkMiX z(2dIex%4|c=s)+M-|0c$>p{QEgMPaQ{RR*E4iCDQJYM+sC>&1Wo%Z>W2VKjPTVBhP zOTWbf=WY-By&m*md(gk*L4VMLUZ_03R5)A=IqjqUgo`3Y|9(Zqo`HJ?N@%w#&qb1^ zClZg0d1Q=)?pO3;1RX{H4+^Mc`R5c5dW{Etsw&n{pGz(JGjcv| z$6)HE{5Dh+U>vxFioEfZGF zT6jAxSEwltp+4B=VQx2}>{j_?mNI+eYzU=$Bdwn!l*KI76UbO9SF@=Z&FKG3t}eZQ z|Nab!z=0g6 zvC}LqNCUP?mebg2xl<-A?QL4n4;-f57QV>nkEf$TC=*_Dw4mLoo;|41?iNk=Mz@eV z7PYc*8xOgSY=L;kEl!xTAUV>}#zhiO*rm&jp!`hJFGydbKM42ovmkhIQ^yScX73r7 z4~YIYKCN1(H}&APqU`mKyKgf88Bpx98Tb;K$T(I=3 zPCUW6wKvF!d_ejd{XzKMFDnTi+->3yG9e!j{q53Q9WIKql6d+Fmk-_`{H)n8NMEBr z2-cN05Infs#2;j0J|OyAuD3c|@b;@tJi!%`HwZuT_6yS2=noQcfZ)O1CjKCk@&VD` zw!PKi=2gGy#1mWudV}zYM86vRf)s#7OOJ@)h`NJMIp7vbAwv*<6EdPx$%j(2G&p7_(z zcT>f@CofDJCU7%@Y${AP^|@@u`moW97R9DGo;d!}VRtr_CYy#_HWPi==w*#!QxZ?S z_3lgVY=)X_X6CY)>cfT$pCU|?V5rh~;^>jTxw9E&vYD03X1Wg>t}oJT!f2a!{^`zU zxXEUAE}MEEHhLbcv?+@xK6>*7cQ#dIGt=MG#(>StJUp$h@qxbA-PxRIvT^aWaXxJ9 zp4QiR?7;`z*^DsRxOmzmA2xPR>uWsp`a|w)PBPiJc-k}{Hg-?zYwSDtqC1t#*-93ji z8wNC_OiUYXvT^aW@jh(qo>t%R!k+8g*;JcsTs&>E4;#Cu)i?a+x$E88j4|1`cv_7Q z8@s2~HypU>33oOTlZ}g~)%mcods=-%?~~~WZMMxRCL0$|o9V;G?rHT69~`{KO`Ab7 z7OP&#jGK#$adUL4)N`y9S$_G$?x5qcpztUY^b`Zs?nU^9a{e-R(D7MN_>T!X!2q@U zjL6VuDUFKW&!cT!6#ZdjFMq|#58RQk!1NrEQEFE>;uNL02uzmXT%A7G z2VTOmH9aLLJJ~2(f>X}fvJqK@k>Cf=kl3b?u zl%mFTqef|JP)-deuixI(wH$%4b64^OOagfFtuvmGsk4+U1E@oT~- z?EdALAR#U_wsowJx1{_3xrQ(!5SJQNL$61pw>Tm|QN|lHvB-)(+!HT>;ReZRkYCP= zcPG1=5MGSH95@+)`8)tq(=PZF1B+hKsCbI6E0(_d@ChdE@NlUfL3eC$${G-`JP)4U zr4IfQDsBA$ZnKNkG8`%c#Ce^`_^$9;Cq{4p`U^3V5Q>|VT=ghXu(NH2H10}g1Fe3t zLYjWELYjW+4GxsKf?crX-AUErUo)5v2lR4fgWNW^hkQmzN1Kz7l|(;TlISN(68*$b zHm|1`wj1vhp|}_gaXxhJ$}Ie8GP>$~y6TFyWIL=i!&$3n2%2XFyd#N*qrGyyJwS-l z6+~x}9$@S}6rH3XXmH*7A>~w#{RP_HiX&QE>xnK_Aq&t(va+eGgCR?Y*O*h#;6Oy` zmEh2Yeq}{1x3J1VTv_<)@WpbqZIk30Xjg&16#!YQw7n1_Z&i28pgV&hkDSki729O9 z@}O)9_MmkbMUx1Xv*l8CJo!p!wVB_b;TadU#^cEu*M~>Q^){Z`ZT07tn{lN3X4^Z~ z+X*)g;Ed#>fiSYjx?=(P5C?Wxj~PzBqHRMwg)u%XrCWQ@y%unhKm#6D(qT87;cEV zufUc6eG--rZ+-U<%hsMBTchYx6@8kzep69;P{v9V;Lsv9fy?pc(GYnJNB)*7eTBMC zMCpV|$6}Lg_meXyZ^Wk9?r*}JpNIj-7k~+Ag$9-W7_)wpvJbO|eOWP3!|?0gZ%!(`x+Vn(lg3jocG^eEXY~IZA?Cv#)HEvbt8V?>+e3aDBZYcKrE`Dx zE3a*TSW&2DRCX`f(f*hJKA|YoF)DLjyyMLj9sL{@ice z{raI76ouMF<=Sr@yX%$imlcJ&MJ08~q?K=f`&C7uW>J~_^;OTk_57a|g?dG$efRe+ zdSlWtMWI$vx%t3vpEU6Hgmpm&E)MAkH3(Sf*IQFMmxec2bf3T;OO{(lxNLblYJ&(w^+U!>2c1wO_w zR;&gYG_)Gx0c^_ZCIvFy61&D(0+`p`@8l`_NlXx>1`JyS0c9}Gg|c^ zWA_b%kN*F_AK;h1_Btx@dm!uZpNjC~`phV|tgX4Lb5my`8CkleIo_^&!}GH%m*W4( z7P8{=*Qoy~OZNa$3QB(Z2cLcN*?rSTmG|y}{x$F4N&O!~)^SWQeCwD*z4_s|1)06P z9htIoMm`kzGGvaOS0G#VpnYycw%Y?{QAz7ENMVscOeXWU8cf(uY!5vxkU=03lICUc zm7QJdH*v@(Nj89zgb)!Wth^ryh6Ip@>?){e4d5sacEG`q?3$F2(6FJ``O`7my0%O5`EPRmek; ztC3Ga9)^4}@^EDO2y>0?{xf8z&#kBzOBeo<5|MMI6TjWE0WFk&s)ol{#tEWYB+RSG z0d;knMF@;pLJg!63^a?8AO43@F5>8!0{gyZa8<||37_?YLzZR?Nx5SzKmWcLKA|Nrt9;T!LnbuDUK?(oX z4`57mTHWNr5z`+di1E?CL;zmcAD1#ujGE9e#AS@hqjrxPT|0Vo{hZ}>)@@B_1V(s_ z%@$$KvWw%bm)i@}VED(tZ55bOZ?BK1n$T#7hoRhF!az~QrpqL$7g<5(lOjrz6?Iyc z>|^@vsJc;jx1#Q>6?HYCTA3*0SutB|x$T~GNBOx@3paHzf&d0DP06y0#j*^i$z(a* z#?Y#r=5JM1Z0+@i%Wa(VEE`idvwqg)t*y%=tC5yRHXzl6Dr665O>J$nSFfJEb}dlq zUs2N}k$QJ(0}bg|BeU!ArE??^88Zaa53bxkMFctH|PdmE*@3+jD58KV+v~FT!CAx7c~wSoK89KOxrC@ z-&IjjJ+K;f-l6DRm|FPS&f5OP7_t@Jurn9Bif+hRWznC(>`$T@J8+@vR$dV1Rh;3> znh9~{s&JKDMZ;xMOlC}9cJIb;oosZ_Ss;job(LB~a12nx5{8-+F^C6qE3imG74#gj z01jNu6vTmw7gT|vwx9}ZtAZ-fnG34GMk%NQ-M^p;>~{rKV4x|e0((qB71$*Us=z*0 zPz46Xf+|cdv_fs673vDDFs0B6vkR>-$5Da(&mIH<8^dQCQG@#zMVY8!RtSdE;e;KN zQ|bs(E=WT@AgP!?NMk-A4UJ9^GYC!tNY+bo)X5bc%ZjxAjBbe2DZ9afA?VK+kN^Gm zDR2EJI6!tui91N@2|tiCoFH)zNm75^Ws~GaqdBJoq*OI0rH;kcc}UXt!wg7iTE$J9 zHl^6SOl)=n*lA@p8eXY1z=gIkYzj3CW$i-6+=V)R{CJK~u~;leC_Bv(inO#$ST$?m z?X+B>COU-rV55h*-Gs7R<&#;;?2R)Zlxf1U>Tlv8fuXx#ooPpbDYLb zv$P-$*eY30W2fa#nXt6CX+b}5n090MQlmefi3*`ic+Js*cBgvwphmk}G~FBBLhdNk z%EoOx1*-_;dj5RBzSPQi9g8Ld_eTKOK)|!D9%da=_g!1c!Th>X1^ePP5vNQ zSK2`E;BFItka78d=x@2+>Tto^uR8GrS4iF<{LI@gNMDmbNW=kx2X~wJgN)AyM1R}% zR)?Ec{i+jBa1rPY!Xpy>g7h`{gPi67!GpU^oFL{n9!0h61Utv=w*@QlWeuyrhWfFQ zy2#lrE%DCe)UDx#vX!<@r$DGCb%HctPk*dLqz?IPe7kHB{pC_GNdnmME)B&Ke>(bZ zs+jlWg=xcBZf1~8g~_HNm(3_2HhR&b*c8VT$6q|+&Zg32)0oR?TlT3^%sFTUZ<<`k2Si>EdEu(5kuU(<(&_Pet=m277Cds+l+X5`^%4UISLdrq@q zKtsyJv=Js77f&1G!^ZAu4UI3{e3Lty8k3ESr%mu-WB0U%#@{@9FZ#Z94kc@U;T(X=;$mcJjw(;%>cD~5x${Zw9p-NOcoUWV}gz~K4=Mb3_R&wa;qBq_(a z2W1a;2cXe(JU+n}8R;LDvK9){V2tt1gD<( zk{cian8E%Oc083ip0))C<0J<@U(9cdx2%fHk9Txz4R12!Iw>^(an9RqyI(@DW zyo6E{vr<}88BeD!5!R4%?>eTgyP-mP`CzJ}KP7lt6b*s5=OZwC#xlHLT zMU5#&jndShoElDEL*;ays6BW6nvUkq=44m*hRBlm`sCK|0w+{(da4|UHP_=%9^1ZC zwLcG0Zh*~~K%DjP2hR1a)@Ng<-Kz|`VLxXe=$#fvx z7CaG)3>!FNp9x;!0^8+tI5rQ|`!4U&Z@JbhO9S1$Y_0%A!z^B>45etog zP&C8^obgb%SgGAC+kyjRIvP?g=v>nSSLkszS+ENF@WkpG_#zuU+tE_^P+&D0zb1Ub z?q7}x65>)*d*`ZnYr6kmXb3Y5aj8i)^eQxZt0MvwWxO#Hi>&CwJ@FD4ZjhV-`DIbO zC)wSM@L~k!z^Mq#ivUbbyWvv|EP6$w;wiqaSo-e5Cz!Ou!=-u!-LcUrYe2wq5j?$H z9sDI!+WG<9W*4hvI8+9RMP14G*6>OvMsNW73o()qikp*M^(axWvu%Ym?n-9^t$wmX zntrlEnts|A94NB{yI_lZlB&hOW-uKJ=*7wgxovI_`HYZ`HYXt~iGH#q(NC5n`iY@z zc5g9kH^wPKaWNX=BIw+mS@_drbk#X@)g|r84p?idvsTeyG|v)vM-mN3d*yn2fDmUY zh^{0(z}R~zI$1%`;JWog%BdXt3$(ixN3^!q6J4xA=Aw;cX>)fcLzWJ&F{hxxfr!*g z!J!-d%8FWUVU>fpvhelc%jA07Cdm!Zt^$8c0J2hPdkI9|pzanzcLqZqIiCe9w#z!@ zLD>@QL04iFO(ImzkSo;jNbqBY7|oMi#m9SU^6+fgRRkhLbO8UmZ_jj1NmGSiwxv60SzZO*&Ple}l3eEVkR; z%sU#8ouKWs(FnT*v@JFo0eA>PpE@shoH`#Etm12x$bAdG*{sKKaROY0T`LH~4OaIR zxbnYG!t&wm@BU%o%8Ozv6n&DSPgd7&DoQWPSZORATBJ5`HQpQyk=IbiGPsOvbC zPN;M&Hr{qWA%pTpY@+S{R?PW{7=U~Mn2=UzROyc~>o+O8FnidQ6$3S#=}ZnRDqRXG z!m?4>UgZ80s4Wu4D<;}RH&!^v??1e+rIB$nSV+^2>7)_CEQb*$heq3&<fZ`}9# zb1x_gwTsG)-#T{h%RMhC3U!N0>hkeR-~RTiibBnzGUMyZo_*{2KPw9Lib}`!?_K)F z_+yGft)jAf?{A+uy6%nGb8@OY1~rbLYgfAbMX^zOKZTLActJY4PGmFs(`JV+y|!)y zdIfbw(%yjH9X<(J(|AP(;%-IJIlA{{U+5~d9TE8dS+oe*Qfod_GyZ;&KA#r&7{gex z8f4JWwnSHY<=R`0zrVPx{LUN0GI{*I7wSR#39|0*+B4aw&PS%VU4~qV+=k3()r*YX zHw-@d{{w%3U;5hXsKoDqtiyjQ!jJ1Squj#wmhP^#U5RAm-1RN-4&57`pHX=({*UY| zyzd=*N1XptmhJ(d6qfw-4<35*(5@+`lyAEk`q#dH5A}ZxS;sNK@U3GK_2!4;R%G__ z4rI#C8Tk<8tB^T%UW07egZ8-v*=`S*MJ27vAcaK&F`3NY8Zcoyu|4#(Kn8(CNSc?$ zmv(ipTFV*3TGhj?0D(8@6iBaq*I-t;Qodr5^Zw<3VcB~|J{8&S-T4&EvrM7<^MuX2 z*k_r40MWe&gB5i!|95j7ERgMgixY|VmbmVQk%s#E#z@Aw)QB;!J&~A+(TrOd=&4hb WM^2e3@6Fo#)x1slMZr(m0{;U(y*23o literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index fde5764993e76..eb245c12c5e30 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,8 +1,9 @@ # pylint: disable=E1101 from pandas.compat import u, range, map -from datetime import datetime, date +from datetime import datetime, date, time import os +from distutils.version import LooseVersion import nose @@ -360,6 +361,49 @@ def test_reader_special_dtypes(self): convert_float=False) tm.assert_frame_equal(actual, no_convert_float) + def test_reader_seconds(self): + # Test reading times with and without milliseconds. GH5945. + _skip_if_no_xlrd() + import xlrd + + if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): + # Xlrd >= 0.9.3 can handle Excel milliseconds. + expected = DataFrame.from_items([("Time", + [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)])]) + else: + # Xlrd < 0.9.3 rounds Excel milliseconds. + expected = DataFrame.from_items([("Time", + [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)])]) + + epoch_1900 = os.path.join(self.dirpath, 'times_1900.xls') + epoch_1904 = os.path.join(self.dirpath, 'times_1904.xls') + + actual = read_excel(epoch_1900, 'Sheet1') + tm.assert_frame_equal(actual, expected) + + actual = read_excel(epoch_1904, 'Sheet1') + tm.assert_frame_equal(actual, expected) + class ExcelWriterBase(SharedItems): # Base class for test cases to run with different Excel writers. @@ -400,7 +444,7 @@ def test_excel_deprecated_options(self): with ensure_clean(self.ext) as path: with tm.assert_produces_warning(FutureWarning): self.frame.to_excel(path, 'test1', cols=['A', 'B']) - + with tm.assert_produces_warning(False): self.frame.to_excel(path, 'test1', columns=['A', 'B']) @@ -832,9 +876,9 @@ def test_to_excel_output_encoding(self): index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z']) with ensure_clean(filename) as filename: - df.to_excel(filename, sheet_name = 'TestSheet', encoding='utf8') - result = read_excel(filename, 'TestSheet', encoding = 'utf8') - tm.assert_frame_equal(result,df) + df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') + result = read_excel(filename, 'TestSheet', encoding='utf8') + tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self):