From 8224ed7296a2dc23936062ebb00a89983ee6cd2b Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Wed, 31 Jul 2013 09:04:07 -0400 Subject: [PATCH 1/2] ENH Pass kwds from ExcelFile ctr to xlrd.open_workbook. For example, this allows setting formatting_info=True (GH4438) --- pandas/io/excel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 534a88e303dbf..031081f557794 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -78,10 +78,10 @@ def __init__(self, path_or_buf, kind=None, **kwds): self.tmpfile = None if isinstance(path_or_buf, compat.string_types): - self.book = xlrd.open_workbook(path_or_buf) + self.book = xlrd.open_workbook(path_or_buf, **kwds) else: data = path_or_buf.read() - self.book = xlrd.open_workbook(file_contents=data) + self.book = xlrd.open_workbook(file_contents=data, **kwds) def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, From e82bfa474d856092ffff6954355d4f8d432bb47d Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Sun, 25 Aug 2013 22:44:05 -0400 Subject: [PATCH 2/2] ENH get_effective_cell for getting the contents of Excel cell when the cell is merged (GH4672) --- doc/source/release.rst | 2 ++ pandas/io/excel.py | 14 ++++++++++ pandas/io/tests/data/merged_effective.xls | Bin 0 -> 17408 bytes pandas/io/tests/test_excel.py | 31 +++++++++++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100755 pandas/io/tests/data/merged_effective.xls diff --git a/doc/source/release.rst b/doc/source/release.rst index a2b525a737879..2340ff7495551 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -44,6 +44,8 @@ pandas 0.13 - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. + - Created get_effective_cell for getting the contents of Excel cell + when the cell is merged (:issue:`4672`) **API Changes** diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 031081f557794..ef5fdf8b68880 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -461,3 +461,17 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): wks.write(startrow + cell.row, startcol + cell.col, val, style) + +def get_effective_cell(sheet, rowx, colx): + import xlrd # throw an ImportError if we need to + + cell_at_loc = sheet.cell(rowx, colx) + if len(sheet.merged_cells) == 0 or cell_at_loc.ctype != xlrd.XL_CELL_BLANK: + return sheet.cell(rowx, colx) + + for merged_cell in sheet.merged_cells: + rlo, rhi, clo, chi = merged_cell + if rowx >= rlo and rowx < rhi and colx >= clo and colx < chi: + return sheet.cell(rlo, clo) + + return cell_at_loc diff --git a/pandas/io/tests/data/merged_effective.xls b/pandas/io/tests/data/merged_effective.xls new file mode 100755 index 0000000000000000000000000000000000000000..e3f8526a31533840b2d36274db32b6d539be19f6 GIT binary patch literal 17408 zcmeHOdvH|M89#S-liU!51VTVSSi&oWfFT!8(X)oR7})Ccqr?H{Y{RISxw2OV*2by}r%S{-!z`<-*|?!9O4y_+4i z56js(H}`z^oZt7I?|kPw&-~`sRmTrKIQ|Wxg%%MOpJvKMse^9dol%BLAN1T8-5s|jSIN9<3KcucNov4?JUi?xbD$W-@_}eFTpk;<0YG5(8 zQs@h!W=Ix3q986%f85VWOw!m8)`5GU*djJcIkqTb5zkQ~B%#Qaatx&?^ENcAoF(Ei zu}xfqU%RaP#n3`h%0x;^bm6@x0JIML$(Q|SsQd}{D+{wkoCgjaQig?pi>DL%dd%%4Y&;wi+Ni0j2xzDNI^VY!r{-JlFd zAnQ`26w-y@rw-y-3BSTGY<>D3J|B>IpOnv~_;MtC+-OWb(y$QUI8I#=YL)rEE1$LU zIRZ~ZJSNM$CMom8BykM+O|gd>3W+z%j1{niZdk%bSV))nZ_2URFvLRfB&(m5^cqXf zb}4CtM-F#gA_a3CgN}4AZVYOys-VWI3+7l-Fh@&)9EON%`p_jcM00^00y%Hh2xDY$4g*$Lh)cOI#-zwZ_-2g5wbN>+HcXw`xM9}f^>ub){j5omDH!Q7 zCfg)@zw**V=T+`ZwU`QEqGxBC+vvtYnou(#YOS72L$k7;BkDzy5F02;ebkoBO0KVy zg!Qu)TZGyg&>!%j-{(XBp%49LAG)7Be)tbb9M0C=`uUL$UCEQ5U&)hC z|FI9wgFf_!edtg7(7)qDf5L}etUQlM9IkrY`cZbm)sLiquc~Ta#l8v=*(>vNHKgdt znJglYNqPxMJ@|ffhZAS5#l4Ykav%l=-vzQTERjqlI%dUa>G#I9ypudRESR zExlRvgH=^E6*bWFeo5yF)W%nOR{Af&xGd?Kp1GoxbWP4`oBk4}c%mh%2d-$n@`H@I z5C%F{gaLl$db3(wPsL?JTx?!AZTC)dj@ap~+2Elx{|T?N=-onFJ(X)e>;3?{2^ zFt!uW3xrI||F9>kiFGuZ4}uwbVUSEb7$i0nkfxX$ME9D_0LhtEUUl+CZ_IL~JA`A; zI29I(p#J>u>|bBV_}9PA1#(c7dV^%13j#UE4U+JYB=hb;ha|UZ%{d<+WwJRbbv%BZ zk0j|IWI@WTD(>2~D-*Q>o1FkoTDgOUPcjVipzSnup%x*pQ>eJNP$y5G%o8dekLL;H zq}f7|X32!Lv=ZJ)%NGhuLx)fw@AOf(mrzcvd^Sz#y>S7AQoWJd&lAdKmgzULn#!+P zeYR$_e+EyJzU!{LvLLn%I8|#ibJB*cZoOkJg&3-1vSw&2w+-aif;(}_XjqH9`H$T( zGnsT=Ee;<(oM$vnnq3RhfGvY1RXkPB8Pk>rxfAA|?M1wqnn!5~o=2ol~p2?m)t z6cBal?Oz@4b``8nBFR;wKL`)R3xcHEf z^rYqum@CBqRc8ewGAVNgw&@$ibP*ME5SQVQvO}mNO0~i;a3P>9l1TpH#0QxYzO%<* z7BHQg8DvwXvuVm_QyaiWtwAK4l0@?4>&Lv=jMCY}^4ZJ?V53$kl1*tM`R5N__hvI% zXVaX|rapiT*D+ztdZ4JXMDoP(_q^Fu>ul!dvzZ&fhRX$ujfuAT%ip}&jM3RF$Y;|O zz(!4LWo^n6$xr_DsyCY&vS|*sw8>!8JOoQiw>_SI%bU$uosEa3O$}h3KP2C#8jTDt9(x1RK7bEeM5!_sC4uyI;ix-EU=HE%ZKbv7QBc6I<8r=_La z9)0~CZ#ENjHXfE13t;24v~=64mml_KGm&f-1Y24ZY!(c`(wbtoJ@krVLx+Zt^=W76 zY&YhxM+cp*fjVtQhy%a)iZ|%= z94M?s2c4yXIxR$q{?0qRL1*MZVH-N=91YZI4?p++LClN^wb(AGx1lKn>26 z=}MKmQiGw9xSoOIixry_9b2L+65ZVc<}OXHv6*3zYcnQ;9k{s{#d`wMb)(ujskjv- zCxy;VYD>lar_!ucDrHNhQnpkoHHb~y+5O?nfEjg*QW6G}6}UjBF7SbuVBUuQQsk}E z@|NN{b1rXGT#1q3Di)Dt=JZEUW`Tbbv=??=JuDN#9XaJS!Q@% z2{*4X;%uHDp1*x-cY9BJs<&@PbZugLYQVhE4HcT7sl=_!?YMo%w(pVc&)bqap!2m5 zXFKeHn^$)0v#~AnDo5S0J#%4iFAR0D8`l`0iHHlKv|i0rxg&04ftigp0@q)#g19Kr z-r3c&DT=j%CTO@4n!N}D(e}F_=q3oL)(FxB&|xNDyi>u^6w4r)a)g7S5m;n6z~TFJ z@G1{*<^O!ZGx47s`6DVq@R$=EK2QfA z*^=nA`u`=GFyr8t+GIm-L8EuNJU}>Wjagr0dmnCbmqKyF#W|2)tV(Q4^|iyjXpY&K z2*Y>JLWt!Sh_#bJqE--9?&u?!w2)i2N;V{Tau0hX&^>3#BsFQF{$8+gqv zR?Bh05fH0-Q;7j{qZ=bM4E==|DG0^AMy`6KDA=gBT^cu;bBL9<(oi3jq($(f9%gH8{mS-QUT&3x zxYY11<`v==$0o@h)UFD@wE)>DYkL_)-XY&sqV9Btyj#8qTI>?nNejy5U=P}aQ8Wct zxjnb*Ivh$JFKhDb> zqLoimJ}c$3x-}v`L0gfB%weSr(IRO^NZrzJvFKssB5zeDf)u$v4FtAc3w|N-4LAc* ziT8f|Dcsx-K>GyKje+zaOALWUFXypTe`EQ0@-R#ooFkD>*&+#b2 z?l2y|5hG5>3b1JyH<@D*384b0T=0j_3$jV2dAmpxdgK4AJ=!|J?K!rIOt$8T9&Hj* zZKCoh%N3tTD?E-KbGjA2$X0MP>*?1oi3ZpK>7nU?`G1+1*KtGqQ!t=?RD@2#eoJ@cVgG3^rHN2G1=dn+R6BNriZ1YL_b z3h`=0&J?#JVpCq+h&UQ?4tEfnIZKg%-c?;KrMc6IdiUfY{YMbF>fk?5AZ`o}e# z`smOLMk~Jezj*B1rFXnw&~86V2&#YR<>y~MICnzj;6Bv9;iLOm|KB6>IFY7>T8T&GtY3IO{L`^xF? IpVz>@0EtBvrT_o{ literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 1ac4d4e31ed10..695822f3efc10 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -18,7 +18,7 @@ import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextParser, TextFileReader) -from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel, get_effective_cell from pandas.util.testing import (assert_almost_equal, assert_series_equal, network, @@ -259,6 +259,35 @@ def test_excel_table(self): skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) + + def test_read_effective_cells(self): + _skip_if_no_xlrd() + import xlrd + + pth = os.path.join(self.dirpath, 'merged_effective.xls') + xls = ExcelFile(pth, formatting_info=True) + book = xls.book + sheet = book.sheet_by_index(0) + self.assertEqual(get_effective_cell(sheet, 0, 0).value, 1) + self.assertEqual(get_effective_cell(sheet, 0, 4).value, 5) + + self.assertEqual(get_effective_cell(sheet, 1, 0).value, "a") + self.assertEqual(get_effective_cell(sheet, 1, 1).value, "b") #Top left of merged + self.assertEqual(get_effective_cell(sheet, 1, 2).value, "b") #merged + self.assertEqual(get_effective_cell(sheet, 1, 3).value, "c") + self.assertEqual(get_effective_cell(sheet, 1, 4).value, "d") + + self.assert_(get_effective_cell(sheet, 2, 0).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assertEqual(get_effective_cell(sheet, 2, 1).value, "b") #merged + self.assertEqual(get_effective_cell(sheet, 2, 2).value, "b") #merged + self.assert_(get_effective_cell(sheet, 2, 3).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assert_(get_effective_cell(sheet, 2, 4).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + + self.assertEqual(get_effective_cell(sheet, 3, 0).value, 1) + self.assert_(get_effective_cell(sheet, 3, 1).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assert_(get_effective_cell(sheet, 3, 2).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assertEqual(get_effective_cell(sheet, 3, 3).value, 4) + self.assertEqual(get_effective_cell(sheet, 3, 4).value, 5) def test_excel_read_buffer(self): _skip_if_no_xlrd()