Skip to content

get_effective_cell for getting the contents of Excel cell when the cell is merged #4673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pandas 0.13
- Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
"iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
``read_table``, ``read_csv``, etc.
- Created get_effective_cell for getting the contents of Excel cell
when the cell is merged (:issue:`4672`)

**API Changes**

Expand Down
18 changes: 16 additions & 2 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ def __init__(self, path_or_buf, kind=None, **kwds):
self.tmpfile = None

if isinstance(path_or_buf, compat.string_types):
self.book = xlrd.open_workbook(path_or_buf)
self.book = xlrd.open_workbook(path_or_buf, **kwds)
else:
data = path_or_buf.read()
self.book = xlrd.open_workbook(file_contents=data)
self.book = xlrd.open_workbook(file_contents=data, **kwds)

def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
index_col=None, parse_cols=None, parse_dates=False,
Expand Down Expand Up @@ -461,3 +461,17 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol):
wks.write(startrow + cell.row,
startcol + cell.col,
val, style)

def get_effective_cell(sheet, rowx, colx):
import xlrd # throw an ImportError if we need to

cell_at_loc = sheet.cell(rowx, colx)
if len(sheet.merged_cells) == 0 or cell_at_loc.ctype != xlrd.XL_CELL_BLANK:
return sheet.cell(rowx, colx)

for merged_cell in sheet.merged_cells:
rlo, rhi, clo, chi = merged_cell
if rowx >= rlo and rowx < rhi and colx >= clo and colx < chi:
return sheet.cell(rlo, clo)

return cell_at_loc
Binary file added pandas/io/tests/data/merged_effective.xls
Binary file not shown.
31 changes: 30 additions & 1 deletion pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pandas.io.parsers as parsers
from pandas.io.parsers import (read_csv, read_table, read_fwf,
TextParser, TextFileReader)
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel, get_effective_cell
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
network,
Expand Down Expand Up @@ -259,6 +259,35 @@ def test_excel_table(self):
skip_footer=1)
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)

def test_read_effective_cells(self):
_skip_if_no_xlrd()
import xlrd

pth = os.path.join(self.dirpath, 'merged_effective.xls')
xls = ExcelFile(pth, formatting_info=True)
book = xls.book
sheet = book.sheet_by_index(0)
self.assertEqual(get_effective_cell(sheet, 0, 0).value, 1)
self.assertEqual(get_effective_cell(sheet, 0, 4).value, 5)

self.assertEqual(get_effective_cell(sheet, 1, 0).value, "a")
self.assertEqual(get_effective_cell(sheet, 1, 1).value, "b") #Top left of merged
self.assertEqual(get_effective_cell(sheet, 1, 2).value, "b") #merged
self.assertEqual(get_effective_cell(sheet, 1, 3).value, "c")
self.assertEqual(get_effective_cell(sheet, 1, 4).value, "d")

self.assert_(get_effective_cell(sheet, 2, 0).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
self.assertEqual(get_effective_cell(sheet, 2, 1).value, "b") #merged
self.assertEqual(get_effective_cell(sheet, 2, 2).value, "b") #merged
self.assert_(get_effective_cell(sheet, 2, 3).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
self.assert_(get_effective_cell(sheet, 2, 4).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))

self.assertEqual(get_effective_cell(sheet, 3, 0).value, 1)
self.assert_(get_effective_cell(sheet, 3, 1).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
self.assert_(get_effective_cell(sheet, 3, 2).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
self.assertEqual(get_effective_cell(sheet, 3, 3).value, 4)
self.assertEqual(get_effective_cell(sheet, 3, 4).value, 5)

def test_excel_read_buffer(self):
_skip_if_no_xlrd()
Expand Down