From 69d09e9417423713dcd05aba5e8f6ba408532911 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 7 Feb 2021 12:01:49 +0000 Subject: [PATCH] Backport PR #39486: BUG: read_excel with openpyxl and missing dimension' --- doc/source/whatsnew/v1.2.2.rst | 1 + pandas/compat/_optional.py | 2 +- pandas/io/excel/_openpyxl.py | 34 ++++++++++++++---- .../tests/io/data/excel/dimension_large.xlsx | Bin 0 -> 4895 bytes .../io/data/excel/dimension_missing.xlsx | Bin 0 -> 4875 bytes .../tests/io/data/excel/dimension_small.xlsx | Bin 0 -> 4894 bytes pandas/tests/io/excel/test_openpyxl.py | 33 +++++++++++++++++ 7 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/io/data/excel/dimension_large.xlsx create mode 100644 pandas/tests/io/data/excel/dimension_missing.xlsx create mode 100644 pandas/tests/io/data/excel/dimension_small.xlsx diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 0ee1abaa2a0eb..cc5653fe2f360 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -31,6 +31,7 @@ Bug fixes ~~~~~~~~~ - :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`) +- Fixed bug in :func:`pandas.read_excel` producing incorrect results when the engine ``openpyxl`` is used and the excel file is missing or has incorrect dimension information; the fix requires ``openpyxl`` >= 3.0.0, prior versions may still fail (:issue:`38956`, :issue:`39001`) - .. --------------------------------------------------------------------------- diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 4ed9df2c97fdb..2bde42357b96c 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -15,7 +15,7 @@ "matplotlib": "2.2.3", "numexpr": "2.6.8", "odfpy": "1.3.0", - "openpyxl": "2.5.7", + "openpyxl": "2.6.0", "pandas_gbq": "0.12.0", "pyarrow": "0.15.0", "pytest": "5.0.1", diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 583baf3b239d8..4f02aff2eb992 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -1,9 +1,10 @@ +from distutils.version import LooseVersion from typing import TYPE_CHECKING, Dict, List, Optional import numpy as np from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions -from pandas.compat._optional import import_optional_dependency +from pandas.compat._optional import get_version, import_optional_dependency from pandas.io.excel._base import BaseExcelReader, ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -503,14 +504,14 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC - if cell.is_date: + if cell.value is None: + return "" # compat with xlrd + elif cell.is_date: return cell.value elif cell.data_type == TYPE_ERROR: return np.nan elif cell.data_type == TYPE_BOOL: return bool(cell.value) - elif cell.value is None: - return "" # compat with xlrd elif cell.data_type == TYPE_NUMERIC: # GH5394 if convert_float: @@ -523,8 +524,29 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + # GH 39001 + # Reading of excel file depends on dimension data being correct but + # writers sometimes omit or get it wrong + import openpyxl + + version = LooseVersion(get_version(openpyxl)) + + if version >= "3.0.0": + sheet.reset_dimensions() + data: List[List[Scalar]] = [] - for row in sheet.rows: - data.append([self._convert_cell(cell, convert_float) for cell in row]) + for row_number, row in enumerate(sheet.rows): + converted_row = [self._convert_cell(cell, convert_float) for cell in row] + data.append(converted_row) + + if version >= "3.0.0" and len(data) > 0: + # With dimension reset, openpyxl no longer pads rows + max_width = max(len(data_row) for data_row in data) + if min(len(data_row) for data_row in data) < max_width: + empty_cell: List[Scalar] = [""] + data = [ + data_row + (max_width - len(data_row)) * empty_cell + for data_row in data + ] return data diff --git a/pandas/tests/io/data/excel/dimension_large.xlsx b/pandas/tests/io/data/excel/dimension_large.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d57abdf2fbbaea74548d94ff72dcfc1468fa24a7 GIT binary patch literal 4895 zcmZ`-2UJr{w@rZ1OQcJcDj-4tMWhHw4FU;(siG9=(gma! zh0v==kt#(5UeNb`KY8DuyK?VZcddQy%szAW*)#Av_yn{705LJ3#!VJsT@d_k1^@s^ z;Q;_-xTh(~%F$g|=JJL9C*;W<+|BHyn-eT+fKmfoQ9{`}j5#f!C z>4I{waB*=E@^*5J*6ws3hk)z%uMkW*T{W?1evYpn6APFGzwAuHDAPMc(spb;&pMsD zV3o^{RiF~TNj6=V+Z=TPh2BqF+%uMeAB=`LrtG*( z%5Mw3s~$kKllROTMn`0IoYIX|hWIWdt*3sm$t3lUyjZ-vy%d1@Y@=nSM3xd`JwN3| zWT9`~Z9OY(!M44Y$*R-M3zWGM_GUR!f>tw?<-tk?{q=?SUpv-|WMvi!S4#$D+d@7J|*!YcM&zjcM-?=`ZA zJ-xe(E0iM!0D$868gcjVakO$jTOsQPlg=+7jP=vn1`Vbhc9=XG`2EyGPcdBw0gYtO z*&txyiG1bI!&}FW>cr(e1-XUKH5b|s%K8pts4Cr9Qi?Bv-WdpRdK0Ut+#*Q4%*i87 zHT#`2KsH>?u2owR^ns(OG}vaKR_<%$Tp=Ekf*fC;L$F*();mp&v^!RknrA7GhX%vp zR3N&q!!M%t3?#A=?H{-#q9_rC*I*$sM9OW^RuMPwFz8A{4zI)uxtV1lMf)7Tc9uJe znG7Hk1u!US#whKk#{)k2{tUgA!rb^b7Lm1;FBpl32FS8VB&b`{Z zv`acd=PwZ`D+`6O?7i-}%%o&CN-FZouL4H=7lNXiJ2N z=Aq==YICpsZ9A#L5B;F53`Ij^CsG^Ovy&yzGdN>iESX$0EmrUTs9?Yw3mgvXtfdvb zj#lt8t^uo(bb(ZMYsk$XzG^O|@Td0jqlvOFa#i?{qb&nNjm$a`P1@yw^~I%0sD&i_HW^G$gRq z);tY5%;t-*2C`I|ukdX~l%J#ck(R<(X2M+sRIajR?TQk+M zip1-hjViza6<$r{$O)9vE8dBbx(`8QO^n!Gb`5mUd@5kG<#a6ATBd&u+OMY8 z{j{JhWf$^v+T6E#NoJx`yXuZ0%?_nuhaJ_N^KPYea@QXA)(_C1k|=#FkKkXhfZ`DV z03NvJ!3bdaKiPD*wX*VX7yk7SIjas`9TEWk#~?G60ZZ8K<eOVzkYRrvz#V**vq-YCkx zluQYlz#q@VTww8e!F|WJ;2&(Wj~e4QmQLPbA^E z6nGv8`0Oq#rffp+KvNXMB?HE|Zsy!x>}W)+<6WhbAFYosGh=)>bV()th^INsm*SzS zc5DCk!c)Tt$efDw=4m z+W4@gyzdZ$nJ5UVth|Y32N>K&8Fjt#L9rYwtu=1MCm^gNeh5H@d6d*?HlgKQ%oYvM zFE^;>D9J0N3|i&CZ*#WOa+h1}(?Y+TSb1#uuwcOqTd9~3XBo64O*kD`ka6&On%GG( zUqfa}(8#MS(W%ERh*IGU0+!;+c~d;h2%(bKMR*-YzTG7XtTQp!BaW)$sTv5m_?Yyf zQ4Q=d2K60Z~ zqkZ4c_TTH|VOg$kbxe+~JBE3e^FC(A3RDXgHBgTUzqFV|6BnToZ>uA*tJ}J;Qx7_z zsWyacBB10>8A^rF-k2XK@H1o7vhfO&I`8b7Ad@CjYKDMm#0bko3(#+@4<3z0d=*y9 zbm>TgM0kZ_#YZ!{g}vo7=G>#n4O`V%q+f;U*7PecSi#G}CY>Z$LgfUr8EK}xyEdmG zD31(3a;OuPs{a~IZTi(>&++DuOhrdZj6~>+nw}r@1}o%X zg~I#ld_M%M|q+Vzg2zlIVmRTpv>~#I~ zGick$Ty8}>t^0m8mz+STU2#YrKLfSl18#eT^hdRM9);^UI(EnxAD!Y{pg!mtLX+t) zBX6r4{rj3ro{cj<#t-ZuHVwgZ#iIoQC*aribK%Myq|r7X;oIJwTTh~rt$8=i2<$7S z39$W$1<#tn$`Dj3U2;*)T1{@%b6QP|Movl70EQU)s5VjlF{1d{P)Ex(jZDo6iJx4# z*t+(EofGdVhBu8>ECTc!T0*iylhgaMWahVSHA-jK8!_sBQ;In--M1tvixfx|5@M6G`|phUcTpj(I5t>uwmJb z#9bka=2$7?oK7{({Lh^=A?sSgAGe`HDZm~&?P4SRa_1}XTKDGDQsid}O4Dp~I;4Yw zqzA2OhIQI?Z+l&kA?NIF8?i)6&qnr*|E%!OpU7#^_plY3|T zU6NAq>#0dUyuX={2YkL(f@44mXH0RP{Y+imZOu_umf9XDJ7=3;z9YJ?iL@C)PB>Ew z(?WgLyl}xzN@tT~7uZE_Y_4JR^ie~-F4$#m>$2%KjrKJ1wR7T4?zfP1-Hh1eVZs^% z30A71>wNUE;tiV2Fe1@mXNyFoXSx<-sU4*Q$uB6gzuaYt?OI4d?IJ81Y}rJAJeN{@ zRBxg)qNc)5tn*Zvn(PjoD{M952ecewHpsILgJMAmF+ky10!; z^Lf&=JoyB#E(>r}5V!V>jt_&Sp0A6qPtS8}7-kT0vwPaKm(`lsl+_mnvoc<>f5t`K z#s_G#7t%?}P8gA{Q+RM+$IWnr6y*VUY~7~Y?$KqB-lh9C8X03ZUNITqhv$B*ikw4Uz6X+#4h=j(Zc+T5ktTRe*9tBZzPPM_|CteUSJ$(I zq{hO|Dz0tgh_-1rS%Qosey*x0K9zKwgSi&er)NlY+M=iSWBrR$|6bjTw6p`LuVbx& zb_xgYx-pEcFqw8HM<2_I-WHcma6oX=YLd*e?k?axay-OGN8gfDq{#0eNbp77CTCWA zD*b>izpL_N??N_RqLnm4R>QD)>f>0j0(SVmin+Wism{d`xowq+l&$yh#l=4Vul`k` z-SdtB<|w(DsHv`|Y)D9Dn1>vK`|)8uGH z+hk*+=>guO@?!nkyUe=cQBy69^||fufq(PzWN%UYE{+p@5CA}f7u*9Vr-Ytxg?X;e5^K`7sErhJEmX_!*y==oXJiH2d4h!4q^Zzm6^me?&y* z+l__yJA=z9>dT4T8Dv~JC8+K4syo7+j*?yUMr5~|OxE2I;byNHca$h1 z(e{G2Me*(YY_6*U);@0*=t_6C4ua{FiTXJx;zHERnE4J1NIKUEpLo>}U+(-~BCNI; zYBb&vOe9!lV3jIC6uki~sgOV=1W05@IHLWo7Rw%)G_92)W=uZ0P8|M94mJ_?(h9$~Yr0YQgQFZVE;t)Q$YNX7yJ$okM>Oe{>zyK zE*<~){o@4VJn;M;{||};m)dhX{`27To4-Hct2iTjrp*7@3Z6mP@P7~Z|C?mSUr5{s z0p$A_z~V`NMgK3&^L(81&E`*>G@QBrx7Kq$!g)FW z6Cr^7U&Q@9{Ji%3frBak0smhmI*&eIp?}bPxbvKUsaN5?8|7+Shx=t~VDjkJi;A)$bT2qGaVAPk*@j5J8d z+#v-);2ZRNulMracg}Cl`D^X-oV}mD*4mGb2JRJV0DzDX(EC^hVqYHqZUF!QNa6qh zAnen^%g)tXP~hTTnL4bFD@-1P364vF`_Gt{ zw_=W?d~bBgurP)HkVjci^gV_4u#%pB0fSbvtDxU^xE3MLlxfXEtY>Zmv^F85?GLHO z7_VGesy9cv*NLP>3ZgQ+ku6ddT>XwL% z`hK0n>=|23;t<&#o-H{N)ugUDgTflux3{7qUskau-yL)?r$R^e=X`%&?&8PZ^z?DpN?wZ4M;^4SzmFv4-LcEX;x|a7rIZ5`*bAANY zhE@aii&EAsyIaqhp#xk%X}+kM^*C{Atz4$C&9}67R@ydt(G{}NtN5Gd(fs-n7*P3^ zPdK#%F`tN9!k~gq`)9hxid$BtvpHV`$w^kZj|*ZIroBXTLdNjqD~Qkxp=Lvqa}SlY zIAY|<>tB2HB-73Y=%-HjoNIs~w)hgK;=IBbOQnI3_xZ}>wql9+qY_M{o!h=5M?wtR z_#3dSv4vEyh<0dd1=D7Dh?>fL;dA#DAOam_0RNMt-Zw1>v3bTU+Gg0bN%zk^LUv;U zzJhI(4LSgT^w%Eo_6cyc^S;<2Xd_eiY$5vK`Rdm{S{24Pz}y9!62fS^@+oN6ZFC0^ z*C1HFemwf@_)`YCW476tz6STM^-1mU_p1ab@vB2q3E`%+d-J@q?QU+SoWH{kb)-28RIx4o)k0g3uw6_|QVl@5pr=TPkb%M{he_p2#+K{MwV1+`y z8HDjsKvi$-*%2;JSBIW*(8Z>faA?0!Bj$LT2~5Jgqe?cH@RZd{PratRAB0G!u%Pq3 zN~MmvClxI~j>kF1puv)j@3p{No~G;BShVL989S|`Uz@s^tTG$ekkmcs;Of0Ho*jLtyCspVU$S8vtt z-a*N~o9>sFY>A-0l7hm(jrjvwi*p$Gd>J_S*^f;prrsj_ACRXg2j z*|=cuYRX*g05?+^bKI0@Iqr-Msk-M%dm!eIZ_glKDz7OPH7!=@Wjk#Lb|nx9OLlFw z)Z+_BWXKQPuqUrg&gk1=4d&%8ihzjTEUPTu*tRkK@_ z=b#=6b2!d}J&~(GN+dr_c9b3oFO^`&%d^C&9Ii218~=LdZZBP`V(6+Q=$2iy=(%{m}ms^tu^_^ zMy2oBox1O2;1V<+@9g$o8=Vw$_@Oa+6zwJVN;kmiJ>AErmKHifc?I>$*V_?L{QHNz zB8_2P6DiSK_3bxA$lo2pygmr^ed9$D8Q^xMq~u)F0FD#seNetC*+sCjt_Pem+mM;|QkpbIe|Z zF6``a)L5jXMqw!GSPHDhE8;8j&dP4owDwO{I3;$wg;q7y`o`cko6zEdb|QMX?mHLB zixa6Vx-w0bboK-RO?`cm367XUdZMAv-ryCM9*{CUw0gfhQEwHkV7(nK*SD5M|0aje zbN@AS{dA)k8(;V)wHV4@ik__|tw(r!MlbR_`f*VNv#pXdT)7;V1SsC$1pOfj^5{wSmxn6dI=Z!v3P-Kf+ z|0r%?v!02|@a;NaAJU2k%^N+Krb2G?uf0~0-irOIl9Nw+7jENOo7bSG^0`7Kc!~m< z+^`-CGPxT8P0LR7iTI@9sjwgf<3hA%a+!g&sy9QlqY8(^89NQc(eihiww`p4lwt_u zD2@_KrMtTZ26Gk{wtn2YMW9>T^=d{spmWFgG~($zQACN%s@iOU3xJYU(}j6r`EI&6 zDMu-PFuosnV`@dMbgXA69VLa5_k=g;n$51QzG-a_9`1BY>t~K<4^<6we~sghL*0$j zur}P8u5j%>&rlvS`{*)Fa%l0y#Q6M&k6JD`D&mg}*lIC6uoxZ{(<-Z3J0z_8ZLxo9 zMM^nAA}3{PcymS#?fth?Rm$d?Pp+JFC4O+{(fS1z|O@c$w$XjxI}$!+uxmyRryi zxeXN-J^g;?IKMgM!hdRSe-9kPN+~B+RvCUt>MslWck}hjjy?vvd=Mgu#+=e9A9kw| z*d&OzB*<5v9szrFmqxn8Qm5XYhX^p565DN!JQ+dmwW=O$-cH<%?NTJcFE6|j)zR%s z*b;X7CwlRk!C>Lsf+J`0>+6)S-cwCKq*D_#sofJd7O?J2ku)prQ`ag(_BBPIbp`u( z)y8vxgEV@Trnq%(d^*kEo#(G@kfc{Fis1P|y`)5gUM!DAMtJ15o zY|PW_i0X)Zh?pPIq=h{TCJ^LKgsRjD+4{k;xBD*her5ICY|y1l$ODkKtFRf6#F|j7 zKfhp8Z$~RHJ6k;;FDEyLpB^J|xP!P;hy;J3SzFf&sYOBIBnjOn+6N+N&8)xYM!p%D4{!@Kx zP;T$*(X8yNMQg?kDTtLEuYHJht0RlZ@iR%KM=j>iNp+R$gwQ9-lpqZq_NcG1C)&Qz z5u%tlQnFUw!Bu>lR@Mfa6qZCw^&a`#qY+m`BSmq^LQ|u9R}$WPMB6H8_o$AeVfG z7DLZ3bAU|>2sp0$I`q^wn>*CDRD?6r^Ep3dr|jkdbUO<`GmFwErQR!s{Q>nfnI!h| z0mR#P>-YE|oN(^d1}3+H?0S%!B%JxPrlBmZU#B4SMZ(`axSPb z&07*Bd2RBzkt#|{6c%wh6qU?nm5VNrDB@A3)n+JN$*;SbP)8@|Gmr@(_8algl}ITH zbnoVI&Ej%S)!IDaK7#GkZ7GvRIp@a#Z@#f%=M|M>-fm|s4sHw!0JE?ddP6mrh=^<` zxpv|>%4Ct_ha~4s^9U<2Xa2TA*1krgzd9xB54;&ZYazeeG4GF^WK2^0@vf+M! zCoL5jdSipQovNHBB;F|2z?DX62+4c;JBb-Ndm4?m6jQU~;b06qi zN_yIW|IADBw^cD?Y)%aE001g%UQ89;-F)oad@P^=9(LYl7pZB1N@`>G>f8w*TQ!ew z|3h;1nXr$9PkBT|b~uF-ufLfcJA)7LLiLGwV`KZ?U|Q3)eZ^3vT*eW5W9vDZ>lqvc zAok)auid%^4Z%KF2?VVvNT$Oa?G1^stft?)PZ|ezzU5ev*2Bx<@%5&CK+Oux>%E<$ za2jQT5mwU25t_A(Jm1TS`q22veochjeFs&7>Z_5asNQgbTeU`Zx#9$g7+}>~aj*0c z@uFB)co2W3?1_2D=BBD#dB&|gKX!9|AH^9QYnJV%TekY$D+%_0*09?sArk>`pIipJ6Yvi>e-C|&~}9x z7jE|SUjB9ssZhs{0!K!m*vu>zEL!FPcF1IY|lxBvhE literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/dimension_small.xlsx b/pandas/tests/io/data/excel/dimension_small.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..78ce4723ebef46d117130df458affa93ed9d48bd GIT binary patch literal 4894 zcmZ`-2UJr{w@rXhL+@RB?}`X$K#(fENiU&yLzMs`U8I9FgD6F%qx2R80RibvNhBZw zQl(0f_JY3m`^o$M+?9LRx@+xoXU>_k_ny(!#Kofl00;>IO?M&sjwRvsvj6}<4hH}r z!9J~F4(>i;qUZPW)FBOA35uAFkho-cKpm~JeRXT>@DsI=u&n-5+pF2)jD8{T2tu>s zB7!9K(4B#+N;%V1jz8iMs&Oh_C1yhLiN_4^f;_VQRpwSVlL4+lV-0R{XkW>Ar~B@u zR8Qa=X7NuJdL4JHWan&(SQ=yOr|sd99+uX)XZA~bvBi5BXUtK8am;>XW>xX3N=(4K z6^_cakvWt6WRZP-6)~w6N}VBP1EXh5+Rg4_0f%sHLjFnf*R!#Aa~lk+>DM z^YU^N4fJqNFzEF}i&M1j3*lirM66tyGjL53lL6BdFMG37Rp{K}XnM9X^3O1o4n+be zC35L&Br`49_slQ|w`JD$JI{^!J8a*|>o$r;qPo`5P}wYrAx;xerkP^DsLj;C%Jg>O zz>Tq>jS;v5)R;q>WTyTe=t{lPbXdMxvDQG!?dly5Xc&DR&ResedK9-$a!9&lBcl%4 z!Il}EF$sg@SNPWyz-oz|7~}lc?zO^EkuNJalCBOoSy8V?en1?SnxL!`GPi>7JL~r) zn)VifTx&*OicJOdq{zf9jd)EH`d-E|4j0=}l%}vqz z#u5E)@W2`ZL? z9lyxiv2CwCXEp5S1zs15dc71UL!+I`@?fQw?#g`Ia?g4h^>zX41Jr|Mwc%L!Fr} zVH@R$9snTyy+(X|@3}kpoUf4eJJX&|#2H#=4DKMUIb2d-Q0w~TX8B7Qxe03J_p8U%X z&QM5!XaEE3JH9wLEN7#OU1MJf`>Yl!=prU z-%vn8<1t8LB_ZVgqJ*+cJPy)MVvLyEuB$d>Bsxq8S;#B%L}7MGRN1vKxSK^&`8hqv zN{IrLJ!}5-n(qUC-ThfQJ*7GHcNU4Ym2ass(O`+2Li8+X1^r9K{Yxxm+H|f@rCFf*Ctx_ExPSy`X%1f!;Kir*}O}=0@E3zB{exOTLS>(ms@( zTkRNhz3C!X`hFOc|4i8o+6y%R4(#L$4vfw^mdoZe%}BNSJS-UrL;=U6dYfq^ufUZ8 zESo6Qi26XPyG>x*=vN(8q#;xR!PN0MaZ({hU2E3H@SUZs9AZIDv#yTRaq3~vjY-R(=RO*+!Oe)oOnqm> zQczymJH0SzRz(i(#;fCdpv3{CeZ7$u6*KH>l)~5AQ{KxkVU@95<%JGBb7~@;)y@}M zw5gj?f0eSC%XGUvi|7ie>Cv<8p)rt6BH;EYHRlv`unSIGW-TU4+3gn!fG73?$ z9r}BYTW+xqYB%T(2>^o}jjRra5Yt3Y8naEgW$ttr&NgDOpE`vBUhp+kn#jluZ1p4g z92vM4{pe<8&^2NS5Rujtt9oh?oYBGppSJ@)_~ra7*`Y*@uDhhi#LH5-Y86LAh{0ZJ`OInw{^lKOWklbR^#Ye8wCZ%Gw5#v| zX?V1^nTe}%lWR`^@tLAmVUl4qjZ=2F7^Ga9wB7C60H?t#vO_oR(F_t40-Gm;KPQox zdZue$!)g3w!jC@&gXK8b5_)HUzEG1U;qprCk1=@d&5AdPD==jNn)W*^DF{V{TYx-T z!QT6gq64U>4E%mYc&|1i>~e7`Z7x4Kd{k{C@8%m`63naOr#AM$kT@S3RHmZKcAg1w9FTAp|GLVR{lwx|>}sRsTq z6>GHZoLZh=#J&tp{#=;(IanbCqv^?T7o$12`Wf6);aD6}3<*uQPN9z?d zOU3pL)qV;$sJMf;04W|;j)^0e*<@SZh=tyQ>DWRYnG+#q2oXmkde=;E9rQJ4X{*bS zXUE#k$v@NiYAq{3YLUaawI6rqIb)2IsT+%iI;{*!#{8)SA4?A`)b;omwp{l(tJv6a zowAs%fZALg{hrX30x692QUJ;TQ^NuNyk<>}$!46y2~O{Ij{|$RlG0ZkxxH<0R%&J# zP{SJbjt%-RB4Aatab-tqKZ_a$1rXe6YL*d`RF4o;aES=IA~Uzrln9qph1`a^1Cb(B zOZ&lw5*$;YYhyu1I|5H%U^m&@ILeC}W@ooS=naa*3+{6&9aqBN#F*a7&}OEOUvK*` z>}CtLQ(rveVA5ha3U;y;DjQy1B>LcJk%&1V_(Lh@mQ-i+FlZF3t~^+!W%{kMzg_7+ z{n&4Znxf_QP8=M)aYC!I-=$6fjhAVSSFAia0Cwxm4R=bXPS&1HBqm9)>v6bneF4JB`iZNrG9pi3I)G@x+L3J|h{THsU9KCtcFeuAU7vlc zyb?1VjPrLU6agPES70+BhxMgc%YII-KF+o<2YUlwn2V>=FWZqY)K1(X4#uBt*3pA~ z)TX3#ku%&R+6DH}S=wqjr9DKp8c}%7ZSh%eQya`cD?PKWalaAIGkTVsGmhVMM~0Pr z>4U7v|;dBL?+?Is9_n`X994iBq)f(57rW{Q5$k zhNl?s6`vqSEn(-tC-gXlT*kWe`pj2uEwg6?-0c2N-PO%jPSvet;j9cou8+B>y7&QI zuA+w71(_4_ElLmk4DXsv5W{={j~u&0dO!ZdjLYLiu$EUN`MQxN+w_{K47rJP84yiDKoBHtm^H1QQ z<+wx_bnSF#FpkfO8gvfIX92n{A9?>2x<&eTMcQ?n`c|-&2*Or{;-3|<_4Yn5NN%#u zdB=5a9MQDz=E#t6q|Y^!rRNf(IhdP4L&j$0XYY(Pey)G>7~X4n^7QF}dXRhb9Rmah z@4BT9TWJo>Y@rE?6}~MkpXsL0O`}cpm34O>=b`%{E?W95BTfDR zZE;`Shr#&*+AIfoeTbG>2j;_6xDsmIPt{h@o7j-CLSb9=Gh(YvcVS^DWI3cEvj3|) zfH_`aHXhU0ULYP(_jdI&l6a3Md_>GCBRJwrl!r zmh}P7!aP(>3GaLhSs9)HsIfRN!nYGHpS+|1Oxz3WAkFJ;OT2^c+cCx z$MQTit%hCsd8pn#zfkw9#0@0$%| zrjEZf`W3Hf#Lnmv;`UM&_cIc%!V1`SO@pRbue)p?ojJ*kcB^$C{TQ1{hMilaad1~* z=d$!}0XFYdLC1Tq=V_~UwhqE+RS1STNK+ziS2Ob;mJs!><0l0)5%Tr^s1Vawh%`s{ zgcAr?-*L#5AxPK&R@BPCGDBqwV%_1vBIS@HtM-)@HHDHa;imyyRwBMiC>%Ss&8A$M zv zE!J5&s`7YCUXDpUy>yRcv&@ng_sOT7yv^6l`(Id2U3QU+WSvm&10+G-TaJJ{^3I{$ z*KGIk3EHl1Wy|FoJW`$wUt>S9qAWf`FwL3MUqZSjNlu2Qj|J#z0&!^0`R~6RSzycY zkKaEIFfIZwZt?%1h_I!-u;IT5zPR`M0~W#h*mGw7&ra|h%7*)U!2jPPGyFngKM1IK zpn(6M@BWKp2K{!-7u@p2fnAi#KLbl6{uTYdi08#P7scjJoLsE8|2OHm7~vwF|A`O+ z{ugq;2*1cZf8gX~|A7B56J11K?9e}GKJ0PMzx1oFCJ38-0DuU4nP3Iz^*P@G{s-D? BQXv2U literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 3155e22d3ff5d..640501baffc62 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,6 +1,10 @@ +from distutils.version import LooseVersion + import numpy as np import pytest +from pandas.compat._optional import get_version + import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -116,3 +120,32 @@ def test_to_excel_with_openpyxl_engine(ext): ).highlight_max() styled.to_excel(filename, engine="openpyxl") + + +@pytest.mark.parametrize( + "header, expected_data", + [ + ( + 0, + { + "Title": [np.nan, "A", 1, 2, 3], + "Unnamed: 1": [np.nan, "B", 4, 5, 6], + "Unnamed: 2": [np.nan, "C", 7, 8, 9], + }, + ), + (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}), + ], +) +@pytest.mark.parametrize( + "filename", ["dimension_missing", "dimension_small", "dimension_large"] +) +@pytest.mark.xfail( + LooseVersion(get_version(openpyxl)) < "3.0.0", + reason="openpyxl read-only sheet is incorrect when dimension data is wrong", +) +def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename): + # GH 38956, 39001 - no/incorrect dimension information + path = datapath("io", "data", "excel", f"{filename}{ext}") + result = pd.read_excel(path, header=header) + expected = DataFrame(expected_data) + tm.assert_frame_equal(result, expected)