From b1a87936551e61873d3165d42490c7a75724bc6a Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 1 Apr 2020 20:18:18 -0700 Subject: [PATCH 1/7] Add test cases from gh-32207 The test files were generated by the bug submitter @joernhees --- pandas/tests/io/data/excel/test_spaces.ods | Bin 0 -> 9263 bytes pandas/tests/io/data/excel/test_spaces.xlsx | Bin 0 -> 8622 bytes pandas/tests/io/excel/test_readers.py | 18 ++++++++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 pandas/tests/io/data/excel/test_spaces.ods create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsx diff --git a/pandas/tests/io/data/excel/test_spaces.ods b/pandas/tests/io/data/excel/test_spaces.ods new file mode 100644 index 0000000000000000000000000000000000000000..375e839c8c22105e09c86ec28d65268435119a1c GIT binary patch literal 9263 zcmdsdbzD^2*FGq%C?z0*N=Zse4kDcn-Q6(65HqB7mvl+DGIV##5F_1P(j}lY@`HPS zzIyN1d*Ao{_g(YZbM~1z>)HG4v(8$3J$uVZ-$o%oLc%~os*h2X^Rwg$W;N`mvet)yAWYUa#$Y3BLwhS@Fq8>mV`r>y1Tix< zhRXc~vu4DT8XFS{>8220V=9^1TN!}$L6#6^=#MIs4cIhDPDUIXiv;WX5^PBc5rymH z8WIvR62|T8ng?lQG7{3Q5V_Y%qS$z(gt)lWDI)b6C?*1w z5`Qf%Edo?}Eu#Qblm{wnD2R(GN{hAMZZf)ii>&|pOBmolbDno_AVnP zB|9Z8BR)MRJ_DANk&~1KOOFlBObE|TipfY#&dEqF$WG45$GaS%grp#%P1-; zN`GITRajnFTvAw43@fR~FRv}GYc6@8US5z>T9{W=R#;aKE2}84sH&-`ZK$a%t*@=B zudgqvX)UjBt7&SlYJpd`bTzfZ>)U&py84^Dhg+(P+Uv^On(98bHukhsBiiZ*I-3T& zn%mo2+TmTDJ)gSa9iO^85C}xar{SKy;oiZKPs3Bgh_<2Lo{8R$k%8Xnk=~J!k-@Qv zk;&P~v7zaSk?HB_v6-dmxy6~q<(Z}Rg{h&%x#^{a+0UOpFRpH`Z0@YDu54|rZf z5)#@=NfALMr>UK(Ek%4)Vst%CmvYqU(kgi-WI^m9xri7%AqpJ4&kFJhLmF&m1k7(c zBE`@d6CY%MLP2N97OWVezf0Ibg~9-mkP?&hp4JD}wQrxksS82{b>2VHoVai>;i;!d zy5FD7wdU5ZKOT9)J=R#_DB;<+ER7iNFRsGqKc#9lqm8@Jm;0=yv&T)_pKBZoP)FwN z9?SXm5_mq^dk@U8cR0sH^Og&BGr4AiI%}xOXl|ofS)9ZgV^H0uB3FttbFAAR61q7> zpc|({DutgOozvY$LH=E|ZSZ41-V&wn+vo@jA9 zj$7|o>*^;3F--T@r%%eda-Z0QE9}b8Y99!UbEG-B?S!wLx4xz9ai_arC6 zfJa8i7yRrGS@%ZnYN+9o%O3Qz_}^;_UiY30uAStoS}Wy_;fwlI;IEHo?N4; zc5S4>Fy&TluyX|H+RNuhR5~Ca0j6auWV0OyP4JfrX*kImYvK%!m(D(+Q;!Mzt3Z#M zPJ*0XU95hT$=OZCfgk35U|ZalnsB)vm-oJHiHtJWAO0wr!MJTPlLq4N$9mtb%+6Fn zshV~LmbEz*{I&ZlURIvkXwJwAx8I@`N58LGro%xkyM$WXK?eY#JIsjde>mTHUvQfS zqpOzc<7|`J_kya!oeR5)&2s<~ir@NnQ4PErb!BWgjN$xK_F9xJi%7~Ct08aC^A9x~ zTC3BamJ(4A`R{coH=bop#m5|v^*>Q+9Ae;GQ>9D;fbzhX24~cUd9trx7vkB;$jMyA zo1Ril8WlOrP3}0icLGM6pa$mQmVTJslf~onR3Eb*X?zZ=)_DH1Tv3>u-FdwUR=gV$ zaG~BPju(plzGTQuYgG6!az}j&MvIND@o|D#>{gO)R#G+n5a@dzRdpf~2YS zGl*$*L~NNXRhT}1m zgYF!}Je`#9i&O*z$X$10Ls*#r(($Ocn9`Cb)( zFspN@55spHn~Z8mB1=tx@<&`81{1{EYm30Qf~T<0*&p_$iO7S`TvcA=R!2s8Zk)F* zr1)#`zC$F;Ne8>jo`1zK*0G)HydQ0)8!taH&9!?xU5Lg3)2y|AaShcsD+!M zxv(~U^?Y+F$p>Omx-xk7=84iI*{#5_&~t0WOdE~nB_+t`kZ{g7Y>DoRDeCM7QmmS< zgA9i@zBy&tBBo}HLOMIDKW#@L5Y-nvJVb6ivV;yt*9{41L>5kExL#arxqR1lCyqGeUEemo54GFc`UyZ)w{iNUpcIXq z?x1Ou$3hc(fypI?`FyN>JNI;KuP!vh<{#jY;LTfGaa;c|-d8l*u#~EcziOD*xFlq2uC>wLSh&!2Aa$-?;g-E0MuyNyL9v3tB4K| z&GwxRt{bK!lrvUmo2TOg+PK{CUeF!u-PX||5b4;X<;j|)t_q;NJ6IS$tU#ZEsfTUA z3)RU1-Ax>Zh*b!>zh6gC_F)Ow-&>fcCBW-Ip;k`(el;c$9p82JfV{cTpHED%nNro4 z2RUn#-6ewS3abMb1wG?~1=h>=ZYJrxWKdm?+b6=RgB(=rif!@Ks~ak}`@a_Qm>HK0 zmqpl7fg4)^!i{1%1I4%}k1}5k(^j(aqPbBqe#*+_*BM1I!n8HJq;@K%s3~t-edUJOm#X>tGj;ddkB^%zK5?WLZgW??dk!iAdmyL(^G3-5K_T zbI!$Vs=kxHgFa20wZM&HaWAkyCmg@e%k92kJHwbWj4R{7Kqlk;P$ozzf<~F_mV%z! z`-)+lM=vva}yJl~i*R&S9=S)>c%9xdRtEdQ@{{uwKo`ZCKh|D=Y)nVQRg z0{H@lqRXE}g(Ib{qlxEVieAy}x@n+)NiJAqC3`e~h${@Dh$Sgtl2t}0?#XtHdG!+I zvAi6AWv*t~r_IFLFwYBB{UdzPNS|4iszisO}rL_O|wn%={LmO)wdz%~8f4Q?K zztIC(>6;oun1w)4D}5WtzbOB%Q3zBYYX9F1=kFebzM-M9rSbK1vHrKg`CW@Q55h(t zY;5`ed-_jd!b%?uGBJigne2>AdZT;6J}fxCdwhPzeG>xAsHk?dJ+zQhErg^6(su-Rm zlI@%O#y!0|G{VIjVfU2jFUIK41U-$SzeMswMq&wK-4&`}TGj^6b z^%{qLGWqnrFog9_%mv=fUC~)+EZNoanRksfbJ;a?u^;Z(pQnt`OJ>`Z-K+;VU#)#- z>c4upR9xX8Vkyv&8avx8$v}!Lh`wOd;U4f-nq6CLu)YfhS|6d zvZHE1>t#-y#$#0s`hiEiCzJx2Q***0FR29vExmL=QBgtEqF&jI=kt!VQfw=gWLBCL z7 zI&I*L=J@QPG)O0*Zapgmz5^Nf^-5*d^pf^MI}mdWA%p4wRpQcJDw_K0g^B6?tfbBD zQcB&jB!0-9u*P@fy-KFxe%bKd+SE(|O7I31;7^CSF zfz$Vki%`uifrNWCFwQbj*5KOD?rb4zx`^1xZEGtpE?~bf2f)pZ)XP!+P-YuV)1Bs z%uXsfu?yMf>KI6~(~f71@rtqpEemcEK!HBs8uT*7ddu0PGzpxe@q7&PKN|6@hQA2?bfRW4G3u%2bS}-oSl6 z+`@(|qf|>4m8UFzW|*>#U`P3?c#I2hqDI8LaKJ+Sj$!WHbqvop0}* z=}8*wMP(%pQb&z`G6Rv{UTi5g3BQwY&Obk8m(4&!5J~Ickh?kz0@$TEy!xwA4|^99MK0-})wV-HGjyxlb3cfq!4m^baMHg%zZf46<} zI?+zY4370WjmQ{jts<(#*4=DY;2GSt1I?=;XNnh0R-pg3{+OdFdC>r)>oVN(xb4{& zIsx%r9~&L&MdpagD{szHU+n4!1n%d8aV)aQr~~eCnxRHy>qnz0&4sm z5wFah>}Gdyg{V9(aa;5E3HVgFr!)JzwSXqNcvUXUM^-^ybh1Q!M%w_&D1*mCFLb!C zT-}#-xK$BpOwI)IuUE-ClYFy>dRLxPo?`e8yMlR%`oyv!VRniAiZKzcO#8{ERL}iZ zIbDhZ*q&=l%GqL9~>m;o$!@- zF~aXx36$dM4i0P`c^af4-;2_Xd}|lhK$_bXm$x#``8Zj7x+cJ+LCnA=6=d^LId}1k z(mVeuOYhHhG)+WvZ`Ma%%{5Q2#Y*^x<&<#1O429wxUi?<){<3o>5&z;r)FA?(_B_3 zh4${XIeJS`Fmm%_bOtU4+onPz(Q1OwG2$(id*aP9l6OZhKH*a>3#s^vw9^3eXg)~U zKgS=_g~6(Q;S_fP8G3yqK4pHHw$(A)sUxdp?HB!g*kxaczG?g zcK!ulT2GOj>-Y6Me$_&H7Hh&~wGhw(BO;c4sl;3i1D}g3VFe-0zO39!c~}st#l1)S zI&JE7kS0bbs;xpv2)_qR)YnhDe7c9741$VrjAHGtk?O8W z=d7UVpx`%c=|}4rkXD%W%)rP&I-hHTCemvqoXbs7=^9GCr2afzw@pV7M^;=kZ9ck* z=mN42Zw50AByuXNYJ2Ez5!**JH@tWhW00m05~#m6ueO6-SbXjc0Z1H%p)Kkt~$ z(-0khX*7EUKVK+0Zl!d7l00zUuh;8?cP7xN1(6IJDtrq&fl-kZ6ykg*xl^fYD}9Ud zM6V&7BG@@Nq28c~AUG+e;*1Ka>L$fNl))=}x=bWzo0ZH?5|*lUvYGFZTJ6m}nx=wt ziI29Ay~cLQZ*+XFEXVSZ87qk|xLFu9=S{Zy)J>>UsTbF-*i)UC{IcS0_Q&cQhilpXw{ zeWFsV``F~hoB#kF^WbE;$RKM3PAEWiRuS)j8E>N$(EW_=Rg7V=-@?)byY~@9qJZ+! z8Hf6KTJy?_)#;2|+&{YBe&M4YlK5j{tf8=OKkp#+l{Pnop{!=Vd{+8SO*H(rS$*gC zBY9BL<7_Jz@RZOc2(ZKf&I}f>+>?&To_G$B$}(8;=5nR9aBuF93K~W>pJ&Ot^F~c> z#y8+;r!Rx*xn9vC%2Wdh-4q2kPfKxK)@G5hw(q82)BI4O#K(bF36x&EbqP70ybr5|nerfBD8gJ-p!1{2CtaOQ z54^76`O`!k9~~WSZEcrccqX+)!~jM;mVsNV8aJ+i^UkRMQZC7*&q0opoJo?!%1SnA zzq!RbIT$R`eE_AnV_AE|olz(7u^y)xt3@hG5FJ;Ln{FA5K7tdGz;Xt&WA+qPk_O*48ReJ>N+5pXkZB9gWJ zYsc9wx6;Js7ohTlobcu?$ZnOH5VeMH*I)*lNC>(hMH%oS`t)*VLg;2WkP^9^%ELx>r~`>?mif?+iL?N#p- zzNi_ozm0^D)je7!1Rk)Sf&Szj;9G7AZ7XgC3on%Ro?%6qVOzC7$s|HTW2<~Hus?$a zKq^X?IL_e4B2-aLVq@v8C1j%hg+jmPurcQ{#LKW*=RE-aA)hW(X-H}eZ3d})Y9!75 zimm4yV?cTR8B08n!O>=cF9VJn=8b{QJqf118Yq?KU<47M6BsO)zY%Z5V3w%e}>?6ZjqG^ymBZGIgy97MVm(zPaQ+p`_mi&;Zdg01xOs5jAI^Ik$w9V2McgUPvLgt+Hrv&WxHv!n>3QNwc+%qnHL|j~^Yi z&a0zNxLB21OdK!fe>bY%F&}2AK*Q_akc)Lp02{a4iEJpBNoH=6OPc50_nfM(MW;F? zViwQQxGrJvF_XHRPZ?E6y?|sJ*an z$E8;Wv6q8dCOEBcJ-%g@Z(fIDmVnhd>F|O|=EG?Ma$0zwVqiLy_ZAMmbOC8t?UkUh zHr@*|=ayZs2h;iYJ`U(+M|plI0PIIi-8P{Y49;i_blMx7VCrN|t{{|smcVvT#i^3^X7;2eG>qHd_^pAvt)-v296 z{X_G|UW1=k{z+8*Ja5PTPx9%nwAEkl|EK1kI}CoFU;YN^Cyn(#MPa}9pZ)nsYyD4A zK)*rxNpt-($_)ebQ__Be@+4*H0P$4bFeiVE+vE*G>`K|IY#Xl@|MFoEu{7 zr@a0R&R=P=e@6OikNkgw^eb)lC(aKe{*@E@s}VO{781WBH!C$hq8_wvb zT;lwNLHfJmuc7oNnfoaz*RJztYWH`&U%l^5p#3R4*JF9TTj!6k`**Ehy~mAb{we&| c1oChFw45~Rb)-i^!oB_&Upt+OKkVoK0Yt*vY5)KL literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xlsx b/pandas/tests/io/data/excel/test_spaces.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9071543c4739bd5c1d6a886f7f4853de777631d9 GIT binary patch literal 8622 zcmeHsg;!kJ@^#}*a0#x#-8D!^kf4De!D&Khym6-q?iQRtfCP65(!o7=2u`EH-Tl{@ znfJ|1X1>4Rz0+&m)4kTM>U+=GyQ#&u-*aIhE$B z9-|BHLV5_;G^AQP6S|Ac4YJ<6xz7r_#*@t@c;ElE*vg>+NR~Q9R})H#Uy4Ob4Vm)^}(M$S=t#l{~Z}CXw=A$(-rY z!H0D>U>E5Zi1yC5Evo7);EVJZj^nM25`MFO!|G*pyuzu796|3jN8)OfuTH0{#cHP6 z;B=L&`~}SNnSa8>9C3jsI2MXKKLkT<`XWsIT;O3Lw`U7Wbi|4J zRihBGl`*G5*5d#=$k-G>9TZVRQp#sH?9(YtoU1Z$&`vvl&9p>F*$JbtXv~U+#XZp({$}HGt1Mn$D;=Xx$nIkA zrnuM6$eKam-r>7VKfXIu0O0-}384NrTGndwFr2`#rUGXj7MzyGPCy$lH`mYW|LFK% z%)!6BdP)3qwGJNa;QePeA-(4l^Ralc%5E~xn&@8m`YX;osEx{Hq?m7cM2Yv}VIa~| zKdA3b|LnX()K)Lu*|I=M1ObsaL!Db`aPpO-3mO}<(>n#nlBG_3*NM}Kv(%?b9xSfV z7`Eci1v$!nFb0M3L-|U)VXl`CF$nW1gGnS)1N6I}>n)gFlp##WYVDQ=SJVq&X@ryH zApjl0-G=*jp19dNfz0gfK|izCe>j5xPhoH`|J|cR^SN>-4;~cjE`-M|-IW-B+L@bv zM|=ALdS4CeEHfRy?+J{8twA@YG)+#_v@zvBx+pieVJ zt!CcqzPPkvbIxN|uj3pdTk(S#D0G*TzD$0s>@~32aGqu0dOrDlcg`_C$VN-vRbus^ zE;irFUenpvby1;rG`N-CEtkGKNeQ*d(hdXSdF66Y0$LA4&<|3$ddSU7*r!@G+Ux>Y%4i? z%7emVS_M_+&oT{qKO%*KG_b(Z{`8+%<2%xI*jx<*XF&uaAQU(B2Zz~*J(;a1cEt)r zO4bbm;(#dOJlLh0gi3EQUJ0yc(fk<^yYC|#-1?Fi7m7MdB<$X(3=7Gl=+8hSN{DUO zjQSO8KXRe-`309<3XQnijm9OFsYP*G{I*A$WJVvYFy$JS%b8!mfY5?AB?%h$C1`0z zOp?|S(>4fCjV7_s46{8b0aMJ6CL);z*Eh{k4kYg9?s(Dff4DLED5Z& z3F=7i^uwup+q_lZWHpRRq&GOL@(W8HSlW@jmHEM46{)%sK;YZ!tuRMqHm+F{qKmMVcCgm4jz z@m6~u#u)G3qWl|ATuFuSmw)mD9x{o3=Ly)#)Cp*z5YA%U#tfgG@A&Zpu2L0f(v^YsaFa%35x>jVzjS4X- zC%RGv7X}+r<3{JmiKLs5#cc^aT&1DmsP-QsaTST+cxvvR!iu&q5fn~OQ@cj1U)Ogq z#u6z(sHkcQb3sdx<3N`)j1ZBaEn)I4JMk=su5FaM)4j`K26Y;MLAQ15z1gChM$?et z>^5bxP~nm1hlgm61@S2}K_ln1oSk{^mW;hO=nARU?78>%-4Kq_7`eg#j6YgrBYlVb zkp68=+DP%fsG_zC(GJPCXPKu!hxQ|U@+Ef(u_VDoMI?l zV@$VE(s#v)czOr@^yxgy*Ynu?)>yCntHruSn zXwd`pz4DEb3SRY`Zr|#+)Qx>Yuo8eIeXN@j+fK5JHN{cCn#gH-xvxDyvM^$7SlvKX z6caR={lc#zmAjP2(RPe%S$pe3Dc#}Jaaee>{F?`Vb7irrsd9( z^twCPJPv7*^gG$O>@cossAW^OLu1m?+M9?mzQ4UCT`+FBJJ>oD-u(9Hu)VY8=5ipp z<^E9Yv|@RVnEQ**(a~lCDa_||T`h)cW|opF<$x}%@}O+Ve4jbOGywPd5J5V#lSj5Q z{xSZBYr)#cum7rEWJ$vqTGMx{>CvfiXI@T%i(W&OFkU%uQ^uC~e^2 zz#&UzKH<0_tSM|H4z$OmR023(=g)q*tT`Q30sMgzP7wA{q-{*ArBW`Es<=rmbZt_S zZ&{@x>|_tGJTP>eNzTcHNt3Ha&Mb7G9Z36j=4f!iw!EVNG$HA2!C(r14j$jo!nN)J zD=v*4O;LGXL8>ni7OQ+ter->g%eWckJ;Pa#ugp2#D*q4*IejS8f{DF?drfWFDHN~l zv@zBYLB<~^={gmqE&-lViSO<7WyYm@66Y4ENa`IkEiNx9N2y-LJ6DfU*M4hB{~2Y~ zIxX>0=oN5?sCe1XFX*oxBlX=xkfpter`3Ke; zMqkpKBvI`sm9X~?X`g18AOPcwK;K!NlF`|L=ZA^56|e0bA?_qBVfe7T%ZKvwN`@k9 z8819Dqj8@v3L>u2tVWEpPNHn^#5c7lZbzZ(y-xNd+4DAZVE47J?lCc5klg)(Ozv$> zPJ32=Hi)MIf=HMKqhn%|&O<_p?_SAg7VF#WC8kfM3q3$0QqX}CCagwv4Tn`dx^e5L zc0&N8qQn>+UMGsBvjErS1FA93X*fWhR#|hc4)G=6?;hGmti>BdK!W5GeIn zzx(v;RK_~B**l=0dH-l5ORc$bjh`-YZvdsMN!HB=xp_1aji+E%qw!ILHNp`6MVep= z$6O)<=ZYh<^dr8Rm=SD!IpBP2M!3r93wM6zZ1cPj7k1+JBc<1>$L=vvy?Yr^9O)nXhWqO>hei1;;f^9$R8!_qDUdC(yMJnHia2OUlG`bK*}msm+mPm_w-wo(cSgsR-(3 z-2AjL&RWxrjvm%7Z;GH#@;DhHovWjG#&{s4OB~{Z@2>T_8&gBR@u@YKHBdP5Z$&-8 z2R)&G^f`c;S!<+I-0rg08oi#4K>&0~xIxx2>5kgK@XdhqgVtwPa>XpW3pdku)$9dE-*N5<5AKIsKAw1V$-8*s{O$$CJ`uNqfvrlkSM;EZ*zR*T(QBc%Don zOO+;INvq7^uBZz}Rl+z;*$$FRQ`qeX}LM+7=;4e|!0R}jaoUAirs z^jl3fhmVOSMl*;-sJnhhvB+u{3@iIOs>V)3rZ%0{mb1+zky3NcD$5#6$bYbhFAR>8K_DH%n^Q*qgV~%a5%!{l3x#kH=G`9c=PX|AdPnsL* zvi8XgELUR`y@^655pxN8QRuK{(MO#3zN|KA^GZ{ki4RR%@LmLJ=HS;A2f32z4BS?{ z+HzrguxJve@d|f*`wHQe>Btk;H|0H(Tze~PoP$C55fLiJUrB41u9m;Nn)Y_w$EVG_ zuI1;Sk`6fZWFhEo}3T2plwOT>uV!5WvyEQ`z3{|X40Rp!O)oy zk-nUBsc?7dkgj$*uFf#+9Sna4H5sX%NH&u|+_Uj70VkNuw*H$dclc0VXPU0Q4ZtSh zk4^n_Oy=vkm|S)>lwvA8LGS9*;&GgtvRZJqZm+ZD5Z23$U>RB@8}eP&GnnVjS1C)?w_)1NNr$XkSg<#fauydh z((d^m3WwXo&P`04O`eo4k>7dL-|!gE6Fc%?C0Cm`4aS>falDk_YcMR+>5_=-m^IEt zTf~`c@hd+f;`-i>>CX5yXG$(C=CyI~)ARse>D)<>T7rGLw@pzd8_O$5`b~v6h;RQI^?|A-s6a`xWfzDv=-=^Pc%8bO>=yV>u;C=Q}Dh?Q!c?aUJ zJtnYa^rFbKS;!K=?jN_C@#1t|oDtJNu)*YId%SJ;``0)3u2;@G`c-K8K|B0g(_Bm= zV7>4!LmM&s6x(qGwz|euI%lz?gjQFHEq=+=c{5w6t$2(lV!2Vga?2uV!Xv!40|=fs zT8b0AfpXg7p6adr#~;dEyeFeFqQkO`(8JO?Vzrq>N|<0SBk5T3G6UE@$|-m^*h^Qy z4pdsR^CphdIH4@^>|Zyr^Yd2v!1Kyo;UwEqo^&g#1qa8KD3i8B$HL++5cWbK(1uYw zIm3P+CUeTvn|}VgG}cN`Gz3b9;w^J&uiR&#!f}s6(u_~?yyEcliz-5(f?tp0V!7;n zZf$7p)DeWILT4b!o^yUMxBl>kc1GUMD(f90F?BILW>F*;)Ph{&k=pRxCF^4L{U%{Y zxcvyCu)mjkV&oY|-duFRu*h1pa%OsMdbMN=izW{ajm5Qc;zt>PQ6K?Z9cb{S#|82S zJM19^-R_ORJc&{numCN@*00}s5PLw2=>c+WC*w(Lclr|3R7(?&+9g?I=Y8my?x~cU z*6Wx;C!1$!4h20nl-;T#`%<{XsZkwM80)Z}pz>DJowrzfDv*#>wfY24< z_QO)PL}uhC*`GOjNdCZ=P>4b2r*;jc@(PLjf2k}S4SzMiB+3;i8`l7?OICyYX*p*J%FBLd$S zeR%AtJm0+pdaxh1Zh%U-SNhadqKmWcCu?vm7ju*6c`Z#!zjSVtAPkj%Q~RFxIA>`Y zBl488fb$DAwiweoUy^9O)k6GQE>}{mal32_Lj)*LiH0Bn*sV)rFhQ8F(=+sIi z53*C<+{&(TgWQf#LLkYAmBqhEn;y^W8Yi+zLkHE2=*_brQR(8%0IgR-M+Y=w1Hnrs zJOQiSqRT^7a#ssBw1?86$>8NU)8$NuIwY6?ct}`ERPCz6i>eX*E`{RLM*3;_38mW- zH5qKu!|25<3BmVEG}Ub>;&0{)7EXbEjVLWWSBA%K<|2W~o8M25Ts4f6FO6RdGD(-| z-^1(4-%*T!$O#v_{=GB)pI!UU{4afSb>+W1_ec($V|X@b7(%zZy2f zpY{LW<@nXluVVL~p7!C*|6io5#s1CsA9~=g9)8u!{`9~O4;=8n|5ZKv z)xob7|DO(u;F=`d0r>yb|G%34T3r2U>Iu)4f4urXO08eL{5^yI=>q_mB?kchkxzd$ k|9d3-tGOoSU(ElAi|WcK@ZALfFySA6c*-WD`T6Vr021!&tpET3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index b1502ed3f3c09..e0c12d845091f 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -464,6 +464,24 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): actual = pd.read_excel(basename + read_ext, dtype=dtype) tm.assert_frame_equal(actual, expected) + def test_reader_spaces(self, read_ext): + # see gh-32207 + basename = "test_spaces" + + actual = pd.read_excel(basename + read_ext) + expected = DataFrame( + { + 'testcol': [ + 'this is great', + '4 spaces', + '1 trailing ', + ' 1 leading', + '2 spaces multiple times', + ] + } + ) + tm.assert_frame_equal(actual, expected) + def test_reading_all_sheets(self, read_ext): # Test reading all sheetnames by setting sheetname to None, # Ensure a dict is returned. From 9c47bf29e95e77b663be95f864fa5813aff6a9ab Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 1 Apr 2020 22:21:39 -0700 Subject: [PATCH 2/7] Add additional excel file formats --- pandas/tests/io/data/excel/test_spaces.xls | Bin 0 -> 5632 bytes pandas/tests/io/data/excel/test_spaces.xlsm | Bin 0 -> 4848 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_spaces.xls create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsm diff --git a/pandas/tests/io/data/excel/test_spaces.xls b/pandas/tests/io/data/excel/test_spaces.xls new file mode 100644 index 0000000000000000000000000000000000000000..316db172360d0d7b849e640136da4a091fbab64e GIT binary patch literal 5632 zcmeHLZ)jUp6hALb+Sj#f(ypDlx%#Xw>((lE6P(Cwwr<)9V`ZxdGSD=AZL_`pA<1A0 zVm3EHa59q(bJh)l!*<8z`H(dPu~FGO@XAHk*S{T%jAt z0<+#+=|ex97y#4fRbXP(UVZ~DX=SQqSm1z+;+GOj_Q(kGgggb0mriT3WAfFNBAFjA zO6k8;pB4S{7FfzwY|pnp<2U1Y1-S71GuHF`uL9SAYr(6*b>JWvSAf)m8^CM8jo>Em zTJTNao58n$*MZlAZv}4vQ~zyV&T3FKjuf<_-*UA!`)g%_ z_v^#oTNq!h0vC>|`o7>Y!I@D4w+NH<+-aDcl(M zGFl|ZqA@#(T8EWNMB`3;WYEHmVs%)K9qmUA*TdFaH_IA6>ZIahj%`_~_^_SK;`T8m zg1PhdiJ47c6g=W>L-O&8azj~YK`5)mlKWu_9{ze=eh9-|f9uM3`}%r1pHMsvBu5+R z6xioDTBiJ4RH9AO6WX^C2NylUUE!Tgm=SCh?&^sS+E%#dIqh~*(=dkIrgXV;Q(n0h zB{0 z2Yn54K|gdO+N(#?H_+@y`v#7ycOw*4>4(uEj>6#lroj>n-p>@{Ml6R6yoM?uMEQ_O zTj0LaR374+Xa}RRBZj!<(vL`di?J7p|h`I{q5J@*$pgEfE@zH=u9Q#5SX%DTQMBX z%SOxj{ipLU*u#rWlqY?Xjs`u3ug`=Wkv{znJ>r68qz$kZ?ZthRJA$Mrs>xf;X=h85vsH@pcmzEH*(23t_q<|ch zqgekiUcLPG-;4hO45&|tr0^OZ1yY<^J^hTMrLO;W^!>>jG9bI5A*OS%^*<2h&1d`H XSpP=oPI>y5BP-Yb`QP9Fwf;W=%ZE3X literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xlsm b/pandas/tests/io/data/excel/test_spaces.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..a41ebe5bb0e655ed2c39c52a43baadeed592ae94 GIT binary patch literal 4848 zcmaJ_1yt1Ewx+u~q`Om6MWj2W5d?;Y!660d6b6xop-TZt5u`z8KoF2_q>+%6PT!3G z`z}1+z3$&y6KnnUIqU5G?frfGXse;3ks)DWVIg60hiD_+GF^ay(~0cM)q4c@madRq{hI(tG6zilXGul41U9z7tMG z0g+~6tpRzx$GJ3`P0j+|XMUPkoa3g|3sKLq>h){DX>FeO8X`eiGNfx^71!>P^;n8= z*UbqU8@={p?L!>)GDzBLm{_)@As-ju$CgGxg8%<-fgyyy;RD|T2y(Il0-gAMT%2R| zW@WkCig_R_C=b%pE9P%cjJ}P1znz~DRkZ@;}+BG50W+}4EKC1 zo%^_9wu={HmAT)&ifOgUxN1uz6C93PUGVlg43Lb}WN<=CEZp;qBC7;{h356@u2c=3 zRbzzHMIraZPtK@{rj(}?z22L=yR!f0tj{Sw%Pn0b@JRLI-M(-(!z&HdDIuPQ4-aDv z&JWox2qblt%v8UV-(kwcTz;@hh!vZp0_dKSqDRl%+Vydgf|zoTh@YhX`tnT9lGL0s8zGG2VP3Iq{A-TDhs+!aP)00MBZ+0D zn4M;B701S3MeJO{s96D-Non4q%(gOfS_KZm8H%CybgwX;)@R{80Cy$IHfre6n7vo| zzM++nEN4C_u(56`dKfz^*U&WRIFJP!Up&qXHIue|z~=qyRqi~l^${)tIm2%~UtA$0 zA$g+y4LSJ#AjjR~xii2Wftzh3Ev*^BJ1x6Ky?4FV2B^)yXZ6}vMw~DT(~7 zz8JAYf1aPcLZeQqiHq9-NltbuFGc0BDkm8)W2`T_WJZ8J(1y%nrk7L{)rfo z;S_5B1UpX(s;4$}Su~|ect&Qvx%=jeRy;>6!bB5b=!vy0Nbn#cJ*%xeH2r@c^FG+; zi$gVPCBIenIUeyue~|liPCpuSxk;vv@az6i#plz<)!Q%D*BHv%LPh)s6(s8!kE%8I zkrZ}HC86`*vN8x0nk+QGu(>87=N6v+sI1FS@CpX&q^;B4duk?90GO!j~0Qw#9DcQgB(~J)+L`)W>p=4OG;u!ez%F zH0$@OLcXXyJ~&3t>2b@{d@t%iX~#vG^r@P}fiq?QYGo&2=Uy|^0)@bKk3OA*FfObn zzV8n7`SPyG9!FkKnSpruzSg}m?Eq%6sMAH*d~UUfeKimEA}LuNuEUfUDhUYpMZN1c zBNY`f;1~sfGDz+djRPH;YUH{7Jn0gk>`r;YBdcxQvWKc4`z5|)?J|%B2GA<)MvbxC zB`BMKF`)thM=yus&tP*^`-OwgXhzDUxlCb(b|9V474iXuuVx_)dSg~mEQ;gzKNwL! zpBd4l2R@Rd@UJynSiGnfjJuxI&NHQt3I&GDB)DlP)?`fCh~<2hN-1W~X&2HQ86hqT zcx!TpkMDxYJ|N`2^3Ri?2(d3j$ElaN4^v2rx@71Y62`icEAb)XT~=evxai#IYq|Vy zjwE@Gv$TiXw6EtU#SIyiys$)y@O?ubS;c)1Hcly`A&ftcc{CSPtu_Yyg(kOmP59u& zdCj+>H|A2_W#E(C&K=rscFUqu(m{tT>U32b$-x=}K^pw1O%1|T86uV=z3r}*cw5JK zll=h88XnY)EaavZJm&y_<$;K04XI_%0&aP&wwJk|LX7G#R>IN~J+GGROcUjaMvM@X z=7RU?70T?ggwyyAE#zls@+7c*kr8RxmqmtI`|HppXjyW8fb#bAJh|D$Y(AT7I@^*vyu^c|oW!(eGCl0Vlbal=IMSk> zV}n2IdAu!@84aO{nCXt+o=dRiGdp5`71KUmu*SKR&sog;<^1%1VZrnMGvLLwzAYDs zeurpkpsX~|{A|r1a_F^2lK+vPz;neSr^P}#on4MqbgRO9iV#uIV>hEU^XC@q+2X1gNZvJ*P6kn` z2$<9yNEq{5b-b1~FYHm(ESczO4Bgfd=>4fOl8M|;u2*h~`oWb)yUD%dSC#byK7mDn zUyfimK6XEdK3-%mZK3tReKUtA#eOu2Tz z)+Ot~&yVQ}<&|^aUZ5Z%qqTSDI}ATJ+g~Lf*}sU|{wWAxqvru~aJBo**Rk>aj_{a# z-u1)DyVxuQ3r8>0WB7$^BZCXrFarB^zZ@Tr;Dl!>hVdr^BwQ;W|4b$|D{(IabnFW< zYGZr80;KOGh8FaOfv~dPQYl{8jJ8&rMW8&1^6Eh=X94hFL?v zdIm&j_5>*3Pr(n~3)eCKUH!r-0?7twtqlUYxeHhWL4e=-SGQLaDoE6PKpwyQ^1%MN zyaqRNDN#O&u4!;RwOs}c$>;aq-dqU<1TjEWXhw$V>$i8ncHUn1X@I>QQ$hM=OV~K~ z7$n;rl7+H4?1ggbC%SbYdtfs1NVM&TdCz=>@|oWxhuIlpzlH>Zz(j^ZEu?NK)1fIXkNZe|G#+mj7eUTYFQxn;wFc&08IN@yZrIBx ztFsZ|r(2^fRhY*$%X^<3V5`c>9Gc5`K<(*gVMC1c5kQv9HGh`L&SZ6lwl-F8dL(^l z&1ytKaDD=|OGSF!g3hNE^u@F3vIpop{I-0ZDUbpR`Hd3EUkS2aP125@(I%;@t;c~G zhWl=AmNk_2cCV|R_6Mp%>q;~-&c@EKP>Xl^reHA021+A{A^URZ5EGHGA0o0~5IkAC zf5kG$Mi5wmWw($UYrHBzhllc0QY;k|wW{6Z68YDE~lz9a)}qI5RMl zyZv1J{{8*L!%45|Hxjby(@oj4hIhHzRVqJ7AFlZOa7`2{b7qosNw4=%lT7`TKBn*w zilWht$L2p;iJ}rqVte4K^KnxiAx($kJQ8P z3|{7sl2c_)isBaU2*=eRI}4%&i}>y$Tc4` z&=@Q8n(psCZP@9m;B&?3y6m7-E_7+}h1BW*pzD0#At%R6T11>EK?BR1@F@8H71*Tk zH%#S#t{wna4-5V0ZUA@l-^xb6U#mlqtnI*%w+T|a@QDh#kn5vQvhVD8Qup{gxo+Wf z*Pl)m>uYelkIDE+fW>YRofa)y;#~;=@YfMy>NnW-t~hz`?)g8NwVT-R5T+6kqj(Y0 zGQH+N7t)e9dO$;V05cji;)>PdU8sCl40zGQotp|>Bwtm;9Av_cXUPKZ=GyR3Cm4r* zHZ>1>+p}FKpt{OcV}SD&CB>m^SThrK!!O9m{7!xEgDhhCy8EpZ;}(}?mY$xBjQZ*n z15Lz0ibf_EWT7|-R}LKI?|tEzZ>oY@VXR4+l3J;{EE`SFdJU~p3In%vG|up72og?d z7>PRTMMV0auvx}p`^)!4#-as=e3j+aO9_#CdaBA4=JO+nyc|8;z+{+IxDH8cW?;RK z-Rn~6g)?Q7%zmKB`^xmtB`%5P9=rMW2i`e^uz{><`gtVGHu3#r>3oqG`Gt^miz`jy zl3$rLOXd8zrxo|9Mvhf7U;LiHr@MY+Il|KgKDwl+r0Z#Sj~Cs?@i%{2F?>unZBi02z4#`yQX|4&N)xyo(E zLNNA?En)v&<-hRvpX=Oi?+AXou}ip0Kpf;xw)=C1+x-SX12;yA_a8{%Pxsqph-gYT t)(sErzd7Y!E$UD2+sHta>W%FY{Kvx8RzrtJ83_p+{>Z>PWXA73{|DbzC0_sl literal 0 HcmV?d00001 From 8f5f2e8e23a3abd0111393a40a55558d1cff8a16 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 1 Apr 2020 22:22:22 -0700 Subject: [PATCH 3/7] More correctly parse OpenDocument string cells --- pandas/io/excel/_odfreader.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 7af776dc1a10f..5e875ffa4fb64 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -171,7 +171,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: cell_value = cell.attributes.get((OFFICENS, "value")) return float(cell_value) elif cell_type == "string": - return str(cell) + return self._get_cell_string_value(cell) elif cell_type == "currency": cell_value = cell.attributes.get((OFFICENS, "value")) return float(cell_value) @@ -182,3 +182,24 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: return pd.to_datetime(str(cell)).time() else: raise ValueError(f"Unrecognized type {cell_type}") + + def _get_cell_string_value(self, cell): + from odf.element import Text, Element + from odf.text import S, P + from odf.namespaces import TEXTNS + + text_p = P().qname + text_s = S().qname + + p = cell.childNodes[0] + + value = [] + if p.qname == text_p: + for k, fragment in enumerate(p.childNodes): + if isinstance(fragment, Text): + value.append(fragment.data) + elif isinstance(fragment, Element): + if fragment.qname == text_s: + spaces = int(fragment.attributes.get((TEXTNS, 'c'), 1)) + value.append(' ' * spaces) + return ''.join(value) From cd7bee6b7da1a359193ea9436f90e3b9321e3733 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 Apr 2020 12:28:45 -0700 Subject: [PATCH 4/7] added xlsb file --- pandas/tests/io/data/excel/test_spaces.xlsb | Bin 0 -> 8036 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsb diff --git a/pandas/tests/io/data/excel/test_spaces.xlsb b/pandas/tests/io/data/excel/test_spaces.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e38b6c2d8f17061f1bf5486d18aaea6013639578 GIT binary patch literal 8036 zcmeHMg{B_*Vm?h+(KX^<}IuBGcPEue(JqU1`6fP|EEsz?Yd-6b8;NXLRm z{Z_sAs_5@8_}?)2Kt(?P-}XQ3fjF%$)n;CTT83Tx*b-LH zE)=@qN-S0)vOF0=o6#8o4eH6g8m2Q}!8K|1s(eUz{1|@)|4fgqrY`fi#QVYVh%3hd zOlI*z*~Zoz4(G{Y<>*JKok<5Xaf)*6 zT8@Rw*K*W3c&UH^?Kq(31YJ!Gp(==ZK$N@%q0KQGc`g0UtZa?fdz^97K-sn3C`S`@ zTTgD$u>~>KFY5O4);Hi@`mt&nl)DQ}uc|^ajYA{ru2w(vp=wkyAI0!&tkb{Z!9&qh zmF}|9A3jU~nplbPVj1)n4xd``$(InVD$Xpr%rp|0eBfSxD9ua7r zNxN@4$AkhA0`@XokqtN}q^>EK%uR!W;6zdWKHS+^^eS2p8=r~>T3g**<)6qrlZ|+Y zYMRQ5i{0Adom!*g=I*q-A|AD>s~@M`MJGBUB8y^J;|}>Sn53kD?OuC*-k~vWCJgV-?>|3?4^uR z2y)QgFq9WG&AN_Dor@c>`Z^^_euj*Ka_9EIvS;Q+NlQ2}h2kRvRMTY<4UmJt)ROY3`CBd z>jemY&*UMZx1OL_z1wWA-OX8XR#b<{<-F-?PjA6Z+=9wtg9Ux3(~Xvn>muB)vi1qL z^s+-meSw+R!(N_>d*!Wvm=jDoqPb%l69czQqye#0Q(9q|3i~?TY46%AU|JGs31UIu zbmo)lcdiNHx4>wk=(alO_Bz_so-7Kmo#QS#EATJ6%bYavrB0q2E^oTl43sO zG|XDQE0(Ma*Cv}=iRNofAzH*st%h%{=ZQ(89N-ySZx5<+P zp}crAgx7iWp3Jun zzVw1Tao`UP{-QVn2iVNVs9ipuY7m2o4b!A*zGGVU&7ugN=uT+ z>Tcn66uE0+uZ>_U`pJ~)DCZMF#m%$?(y{4qJ$La74}|>uqCCYUo=TCB$cFneGs2}b zsFM8q99()++@k4=3snG?4ERmbE^;w@rna(dA8&IwYGf#D3#TX(AT-3G9T2Ctr8?*k z3^m>g8oOsAfvP*_fVWZxBNy6lE2k<=Z;Elfm3bXFtN%KoP0+syA=W73T|VUjG1jLL z^K!APbTgN;`-F&SGF40Hh=~5I1FKQmuq~W4hpTiIl!={Kdke|BWP2pWb2(tF$K{pL z7RvfK7_=3?Iab$>d(Bd1pf(a@LFvdY*n&qJDp@c+0QHHwE4r6mI<1YP(~waQ7qAS4 z3!b^o3%mLB(O~yz4+pvQTrh~|r--ViYDF=KA^rEeXB)`~>I1O=fL72~Y5y&O-EF~6 zV4iPBzOT}LL%-(<&_trUOqSVJ@nnwW+~>{b|FL4AzY% zzC#HyLae-nyPDkionXEYb4KS?m9GdaxW2^s+CV-1l5ARgSw25IxbyhY zBc59iWG{}F428N@LByGm=t4T7!H3U`n0ZBu)D5sARGYk@7|cZjgbpN z{I3V(+|#q`Z22%=hzQ1W2pGH@wk7GNUSi-a{m?(mO-PFtSR$E!ZiXd(9ntuh)FfEb z?@chi@8-<=4<(Xp^<}{>IyJ%Rc59>QP}rh7mN5<`;@rAQ#Hx2SgLvj@utr4!8;?4{ z6(9F}wq?%P6ALamH40i32^2OMv9joauVrwEd5!Y5<>GIh z>b{T=w~Nk1i@>vM0VY9H=~~)|@2Mt~FU(>T)uZ^Fd_g=<6tD9+-hZxJGE;l1EBIOm z+(DgbW!`zVJ3V`G)b4sPY$!vflwvxDZz zl8eL1Bhy=zULv>nJ`xjIew=PwnPDJpMx{R8#c4=^GfKf>5`?Rsc{H8vs57PvpPCS* zPE9OS1!= zFQryv;B$#pPe5-b%6oQ+OWN`rM$r7M8O#kqpFrY`!#5VFD`Bjj`6=m1$(Z6&Hz2W8 zCC9_?Du z3iQM$`NAOtvNO`m#yghQ+XRTK4|I*W}}Qw9bMxeEKmnvycz1e(WuPxFH%qq(@AlfB%Qi_yMX#!+{C@aZ_85+`P;0Wk>* zzhuAJn?Ww9-WHFHmhp%MNAA?rOl1gVI$!yh)Feh)#|PCDX*&%=hr*uCZFb&MkBj0w zI&^GI6-B1(T@OE=V@)Qb7%<`UX~CvbJf=sCyl$Rf`0G}<@R%OmIJp(IZqgb?vrv=R zp{gUM%5$%hr+++eDC!{Y?u%o%Z?xa?qrozxdwz@u`OyKU0u_XK>>~bC;rk5l!RXPi zx*O3+of~5lc7>n5Af$+7sC<-_@v0<_Qzqv=T`d!_tHsN()S7E)#q*YeN@}+Xv^`!u z)6ogE;xob7|4TR<##S@Hsk#k8ByUQ+D0jwY9q)?aWv);dre;ijE(vDz&jHH0#q@KW zuyRgZCFRO~u<(kSRD$aW_jK`aF6@3AJ{kcEop40(p1>tKJAl&FV`2V-OyCJl_iO{YZrt*(LL%W;SxQkQjPe?XCk$MjVJ&PU>LnqW>OP6 z)xOaM1Byt{D_$vns+xSof}Zb)HC(jb;K3GV@+wv4TWyCJg7eD7i5Q)8$Hn(+)lYZ* zqH+h_-|=N~rq)Y^s=0;i8*)AHD1}*ZxE={7r~0zEx6smImzC;kW2O+IkYSAsC-b8U znfuUl%)1oug6&3Y9}n`yMKRPwsOxBSTctI5u=lO4zMjHtHLSMg%`=A;5~Oiz1>TIb z*B+|1e@S(yGmQ^q>!MJo$V0kbLwhK-rI11pY}S>a%dv-95E)LF`|O&S%Jd}L>$(mP zLw_j~ruTDiJIP%#?Qb(F0&?nQVFm3EGg=NXF%0#5-|Pe^bW)`bM^UvDIgVz0RK zdS`dyLnbwV()%i=`CLUM-93g*o_i#)HbBkPwRov_M7^FOL7}#VzAyo&_oP0|6-6j& zGmoD}UDOhJ20!51NMtW^)x-THnLUjA4xuT8w?1O7SA=)JicDS4ibS{hpymO(F1dTE z@x=OW>XQ?1HzhaA&iNZdWcy$hBhx8ehZCNyJE#gPK3RhHqmy-V`NrE(ijC=`>{Ta` zCU*U+LReXn;*M2!dHlAOgM*#xXaz!xG_xkQ6NBVr2VAO@deU{)F?*aH$lXn?obU>7 ztI$5cER`WoW-%P(H^PQ zG3QRwx6Ry5Qx14rq4&njV;t6KBr4+J0tt~|#l1s>)8^Q2;%D)U^U4tIY1Svvj_D2t z>TtnZeHDS*U8C!wtZeO9N87E;SOSM!z1R9GE%KT4{fM-$k)kQFtw}qS0Wsp3ndfVs zSzOh#@2*g_EfIbyR=xSWHyRU9E$4b_;O&~L!(cVU9<8YN^DKD2fFLGyZpY+>R$bAH zG`?*H(TP_fJ~Mw0PZFV9W2Dg%It zH7A0pSossR8oj60^A<|4j*Nq}M8_gWPcw#Q{naC8xj=3gwjIW}INSKBS6FoboHd9;A&#%DYSK66~YFpqA&QNQbZ9WE_+e;WDw-g}c@kRyg6bu9V!k^dCC zA5pWnvP$Y^eEuWXgiDS#7~qt5$LEn(-EbZLvVAD17N=m|3{; zX_Qcr455M-_dcy4sD?jqi*U1+R za{qgJ^>?l#wZA#UN5R<&?ENj*2VrIaR18o8Dql=MXyBa~AzJpP_JDvIi83uv8W64< zn3NbB*mw<^dxH;&^@qRPy7jz78A&S>WOxXXT>vW=OLd5gs~eA{3k3Xkm;1lTKhkxN zV1`P~yyT(V7#GqV{?BPrKJ!CVYWRiueFzig{a7J0IIKdW^~yL~KQ zJvADCgfRh?TV}fq$$_cvbxg(8ddJ(TKN9+%_SU{Hm9WGZgdROM>9)2~ZRNF9@+ zB9F)~KzUCQifbS9VU88Lqc$2;-17n})apZF$Yz}Hkfc}dvLjyZ;S4l?tlFucIXHAb zdS`NIp=(FWwEck&o_g$Ga!Hu>l*YFO(!rrNqGt!}_8U(r599z1n~$sV*tc^QD9%$w z#^%I`0)2Z!BevSCQm3X9Ike#BRA0)W#CuZe&14HDlU_}c%j>9 zhqQRplp@pWc!7nP@}dW&7AP=J8{2y^=)09cK!7oNac^ipP0 z%Eq;E^L;AwaxX!y50B-LFz{%g-h)8iBylP75t?LPM!n*8s0G9banO+CQHe%dzY!H4 zSXHCcA?g4%&3}>7Lue`8^0bUw9bWA%thb}s3w|u}+HAHwD~mfmCD+~|GVEq_f8;K< zVjA!hVRvbLvHnV;=9C|Kih5oK!f`WMjSYlZ+R zCVk#(ZLY&BCmFF+=};devBy_&|a`VDo9%zm~? zI|w_v<=OFwL1-q+g}Zj%ZKB0j+SOnMtl?z%G-H`o7}1A;Xn)@QfLH%D^G1f4-MKTK z&CZSK8j~5$PBo^B^}?LR<5vh5WH`Q8XcSZ~WHtWtlHos5`{(?J1w?hFUjcqyx%kKM z>$wI=ji1&rE*t*Z%Kh1J9`joT`F|R_mvJt)_I@B;$N9Gs{nHx0Y<#(4^}{$2_nYyL z_SI$6%N>~?rjx*LrkDFOmjNyd#2)~W$PUl1CG@*5eh80mE$+*Jm-W^Uz-XlWLIVD# zzAhtN);&KE6s}z!(vQJhM!Bp8exUe}UPigB4K8E-TIzow0RXE=tltX%W%FM@(?6R# eAf?Q|=6`>*)s--isRsbyA%E1!?B%2Sdi8&y2U)}b literal 0 HcmV?d00001 From 5ed0568fede67272ad7b35c85997194d279214a8 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 1 Apr 2020 23:02:47 -0700 Subject: [PATCH 5/7] Apply black pandas --- pandas/io/excel/_odfreader.py | 6 +++--- pandas/tests/io/excel/test_readers.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 5e875ffa4fb64..b26000c8f5ee3 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -200,6 +200,6 @@ def _get_cell_string_value(self, cell): value.append(fragment.data) elif isinstance(fragment, Element): if fragment.qname == text_s: - spaces = int(fragment.attributes.get((TEXTNS, 'c'), 1)) - value.append(' ' * spaces) - return ''.join(value) + spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) + value.append(" " * spaces) + return "".join(value) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index e0c12d845091f..99447c03e89af 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -471,12 +471,12 @@ def test_reader_spaces(self, read_ext): actual = pd.read_excel(basename + read_ext) expected = DataFrame( { - 'testcol': [ - 'this is great', - '4 spaces', - '1 trailing ', - ' 1 leading', - '2 spaces multiple times', + "testcol": [ + "this is great", + "4 spaces", + "1 trailing ", + " 1 leading", + "2 spaces multiple times", ] } ) From fdda928bfd72e330c3b1cca559fd799bb0c37e54 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 2 Apr 2020 17:10:02 -0700 Subject: [PATCH 6/7] Add docstring describe purpose of function. --- pandas/io/excel/_odfreader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index b26000c8f5ee3..739c77d1c0b99 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -183,7 +183,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: else: raise ValueError(f"Unrecognized type {cell_type}") - def _get_cell_string_value(self, cell): + def _get_cell_string_value(self, cell) -> str: + """ + Find and decode OpenDocument text:s tags that represent + a run length encoded sequence of space characters. + """ from odf.element import Text, Element from odf.text import S, P from odf.namespaces import TEXTNS From f7894eda09455fa14b4dd2e4dcbdc48e35f6810b Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 3 Apr 2020 10:06:18 -0700 Subject: [PATCH 7/7] Document which bug was fixed. --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 19e8acdaa7384..329aa3200d43a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -408,6 +408,7 @@ I/O - Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`) - Bug in :meth:`read_csv` was raising an ``IndexError`` when header=None and 2 extra data columns - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`) +- Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) Plotting ^^^^^^^^