From 9fcd30bda773064c089815245521a1237e89ce22 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Fri, 16 Aug 2013 19:47:14 -0400 Subject: [PATCH] ENH Factored out excel_value_to_python_value from ExcelFile::_parse_excel (GH4589) --- doc/source/release.rst | 3 ++- pandas/io/excel.py | 31 ++++++++++++++++++------------- pandas/io/tests/data/types.xls | Bin 0 -> 16384 bytes pandas/io/tests/test_excel.py | 29 ++++++++++++++++++++++++++++- 4 files changed, 48 insertions(+), 15 deletions(-) create mode 100755 pandas/io/tests/data/types.xls diff --git a/doc/source/release.rst b/doc/source/release.rst index a2b525a737879..275717ef984ea 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -44,7 +44,8 @@ pandas 0.13 - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. - + - Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`) + **API Changes** - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 534a88e303dbf..aa89264cfc695 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -53,6 +53,23 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds): return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname, kind=kind, **kwds) +def excel_value_to_python_value(value, typ, datemode): + from xlrd import (xldate_as_tuple, XL_CELL_DATE, + XL_CELL_ERROR, XL_CELL_BOOLEAN) + + if typ == XL_CELL_DATE: + dt = xldate_as_tuple(value, datemode) + # how to produce this first case? + if dt[0] < datetime.MINYEAR: # pragma: no cover + value = datetime.time(*dt[3:]) + else: + value = datetime.datetime(*dt) + elif typ == XL_CELL_ERROR: + value = np.nan + elif typ == XL_CELL_BOOLEAN: + value = bool(value) + + return value class ExcelFile(object): """ @@ -174,8 +191,6 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, **kwds): - from xlrd import (xldate_as_tuple, XL_CELL_DATE, - XL_CELL_ERROR, XL_CELL_BOOLEAN) datemode = self.book.datemode if isinstance(sheetname, compat.string_types): @@ -193,17 +208,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, should_parse[j] = self._should_parse(j, parse_cols) if parse_cols is None or should_parse[j]: - if typ == XL_CELL_DATE: - dt = xldate_as_tuple(value, datemode) - # how to produce this first case? - if dt[0] < datetime.MINYEAR: # pragma: no cover - value = datetime.time(*dt[3:]) - else: - value = datetime.datetime(*dt) - elif typ == XL_CELL_ERROR: - value = np.nan - elif typ == XL_CELL_BOOLEAN: - value = bool(value) + value = excel_value_to_python_value(value=value, typ=typ, datemode=datemode) row.append(value) data.append(row) diff --git a/pandas/io/tests/data/types.xls b/pandas/io/tests/data/types.xls new file mode 100755 index 0000000000000000000000000000000000000000..232a88609df92754e4bb8759825ff0361874d73b GIT binary patch literal 16384 zcmeHOdvH|M89#S-lWYi*1VTU_VF|A!BoGpw@>m`|$M6`%!HzhX$1bGAgsvM4Gg3lp z|Ebi$q(TK+Yn7Q+tyZ)?rnW-I(ROCFomOkH+QAMwwmNO0c3K@_`}>`u?cSpEvyhhaXwutiY;^l?~F232r&^aOrKAu z(~2krUN3~tkOs~{*2!qRAYury2$5}Gf*3|DMI3=xhBy-OBE(UM<%lo=QHeMjaSUP= z;#kCL#Bqq@5idrZfJhr*tg*a*hSc=A74;g?hrgtVh|5GTeiLFBT4?y88Wv+JhrS>Z z4#>jC6~yK0$NrqeB#jMW6SybDcF`f_*sh2LJV%Rwgd$hUF`S~zyJ()uStPC%{o-c) zwadC+1uev-Or#`5H{N@FKqP5jTq+e2@Gu!)hr*yFnR_!?2d) z%@9XGt$AklIF$u#xd&pz)G3#oAOyJpQG?J#8a}& z>yk20)QC5bKP>jslmnun#8?9>=z$eVh);BOF+O@88;fU#v z5y1H9pTmF^=HpW8iBS`p1iy?ixORH&w8m-Enzz(1-CSoTHrLmLrec(bA2vz&e#14f zt{d%{YB3GK2xiUZn|LMPiF^r%kM=eH@EzsXSyLGtey%1rBBXe zr5>Kh&_nl1{X6MD@}NKHL4U-9e%OQls0aOF5BhgJ=y!O~z2x!2e_Y~lrf%2IcRlDz z9%p_fkCXm@2hL+2^d~&%Klh-2!-M{e2fa{vej;(W;<4*T*$G!ZlK!pA%7bMG%S7ma z%+HmOqQ~RUO?hsL2t6g~Mere#{vTwJ!L-kM4|<~qy}^~vm6ycV{0UcWlAg7HWzQuh z{S~nRuTwC;l5%pjC-W;hs;}r=0cPkCyrTLVUE|9YqRgMwkFtNR7Be_E;T4NQg~L^) zq-W)PD5JLw{YYhHby+p^d`Qx{`ZV#Co|XQKFfL2Frf05ZC0&!V%A~)7DV|u7)dN?v zZuznp#_=>Eik9h0+?JDR_Ud-0_=ZQCt&OjuhP5*Hz|Ty zub~$Rgfs4h9aGJdBM~PECfxZ!(ouhq*5QD(wAw**CpaGSqmMgs*{B+u? zus{g)=Z6>m{&L#8{&hBxBcj+HB>kcv$RayP%tMm&dq*si+@&?=0)Ujr=A_io=&c@- zr0&Urlw_*7XV0E=Boo-|1hCRdEi`LVdK`L)~sdS+(-XG^O{(g%C>hMrz+7l*ueTU}QDrtXV_0 zX0(3>SCc+^^k^2uv;nJXO=ecw@YSt%%;gY6bxhU_ZDqFsXDzs~ri_NQa7<^+j+suU z9JM%h?3lx7tTeM0qybw7(`c+T=agwK^5$wmJFuyCclbK3KQ2RoP}aRVYC*lTdRC`K zy_+<%KRW$ZElTD5HWJQ8HbK1o7AMR(5QlWsaZtqKR_UCQ@GK@TNUF^r1RF&b5G35H zqdR}S_e>iOh&q7s)(IC`d8-qPbK>a@!n3KoAgMNg5Oru~PPc*Rt`mQd={89`K-7Vp zcXha6$y=RRoO5e$knzI-NwxWd@Nh3D2@>u)@dueP91waRzv^&NoRh?}@A!l8>@qJ% zs?8q+>q-j<67D+j2bnn>5OrYd-Nan*=B-XF&J~h32+wl!f~4B~K_WH~B;0l44>D^w zAnE|yyE@#w%3GaSoQptj5Z;5x3zBN{2bpRELBd@pb`X6WUx8w|3AV%RhXSK5QHvJIrYIIa`{o<&Y)0#BTAgfW`mj;U8p)#*vT5*P!-Y=}(aoOEYXO*ZrWEo}Bp_Rcd;yR#XuvvIMsSw3v6mX>OJ?ew$mY%bQ>xLDe3A2wD?OSPqrzwXXvg3iXp z(&qZGv07TH?a4Rab!RhCXX9dNtv+n5mX>P!_|+%e*-Rpvh5nWn0h@)xu(X!eyB~i| zv7tjl$ojO&IvW>Do9@HLYH2O4ClB83&Zb6Z<6>!bK5VR(*3$Zm-yd>kGeu|PVrh*& zY^;{n(t7N^7v0%JbT%%Q*6hQ^YH2O4Loa1KwB9yTbv7=RHs6Ph)zVs8KRUkOtu`ZZ z&W(v$cHCU0jhmCxC7xrY5bO8;%pG)E4ipxpgHF{ztyY9Hlnra$L8s?HVLv+P3=Pz3 zGeR8x=@;BVXXHR(Ejnnu25PksAqKj>?+!XM2MXKJL7OyCt33#D^}{c?gU-r=M!=etOt61IMH5guEZ+T-PE`Y7Z{$y8n1TNw(3bqtEJK{U{9W$P! z0v8#u>A$AEr?bDOJ=xvYI~d+;2MkoF%Y?dn#JgcVfX>}TE0Hq^f^z@MWWfC>$)Feq zRs6^e5Cdwkroy&VsVy}WD97~-9AB*27VF#|SrhB&84T~y=5c2SCgV6q06=hWpr z@Dj}1I8cndbz0tHTxHJXjffjC65PllvP{!J2xVq!WkTsN%Jk#nS3=HxJ2O`!$z{$! z2};b-N|dBWI!f4ijTLiwZT7OAJ9^rC+mn5XU6GBkoyo!Q6?Uk=f^-FLWbVYxJGOnV zY=7RA+y$L)gg85451i|ptd$`5*gtEoXig3q^t!xw4} zAvneboc>U?Scz3FLxB-u0UAaS>Gc#g2@;>Olm-&JGRr2K4!yR^vzzFmgVk99HHz&F3k)mK{+jME%mCiX@l`>tL zN|`QArG^4!Vu`>m*!upYZ1FE^OveIxz0`rT&8;q<9@5t4B;+JfDN_=aG9^(dy0S|K zilDpcb`gq-&=4C?=R|hlPnFSBm(o->b|-tFt$FrR!E!XuMp#D@4M%-Bt=%BRG6~U_ zqy=bu55bEi1Qo7YKO~LHw!c8Vn|?&q)=bgFD#dcNk=WFp=%ve2?lo!`G*AXly$Ku= z=vSuKa&xO3#HEJs3|}wqv}}^>N9`){w-F#6vbNVk|q&)9~rI#1~Ot4W1tSGYwYj_cq+vAMr92~xuh|n)0khU zVcc=hR6FMI&dGLB|HVg$ub{uur8<3Yp{t@Ljic>D1K~$BFzFZu-hvAI;PSMJxp^+7>n@BKL$qG!*D^}<; zuc7-)fDjvSt$8kDM6AFz0B8s?juSI*7>B=#iml;sVyh(^ z6lLWu| z2VQ;Y)gyBzRtz0P{Tn}gg!O+Ok$r;o-+)M8c0D4`l)DglnU6C~)`63C*84U@4w$zi zQXl&esh@ih-SnaAG*V(Ai|20*m{1P3hng{nksuM0Dy1l!`V!l3;bh_#*~6LJ9*oSg zcn2n&9pa1jkHbjue_>gB#xROVTcNHlg#W(=40!h-MtHWNF3n63a{X|9Jl@?IQ{6Gr g(%jq{$r_g&F;;iS