pandas-dev · cancan101 · Jul 31, 2013 · Aug 26, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -44,6 +44,8 @@ pandas 0.13
   - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
     "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
     ``read_table``, ``read_csv``, etc.
+  - Created get_effective_cell for getting the contents of Excel cell
+    when the cell is merged (:issue:`4672`)
 
 **API Changes**
 

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -78,10 +78,10 @@ def __init__(self, path_or_buf, kind=None, **kwds):
         self.tmpfile = None
 
         if isinstance(path_or_buf, compat.string_types):
-            self.book = xlrd.open_workbook(path_or_buf)
+            self.book = xlrd.open_workbook(path_or_buf, **kwds)
         else:
             data = path_or_buf.read()
-            self.book = xlrd.open_workbook(file_contents=data)
+            self.book = xlrd.open_workbook(file_contents=data, **kwds)
 
     def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
               index_col=None, parse_cols=None, parse_dates=False,
@@ -461,3 +461,17 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol):
                 wks.write(startrow + cell.row,
                           startcol + cell.col,
                           val, style)
+
+def get_effective_cell(sheet, rowx, colx):
+    import xlrd  # throw an ImportError if we need to
+
+    cell_at_loc = sheet.cell(rowx, colx)
+    if len(sheet.merged_cells) == 0 or cell_at_loc.ctype != xlrd.XL_CELL_BLANK:
+        return sheet.cell(rowx, colx)
+
+    for merged_cell in sheet.merged_cells:
+        rlo, rhi, clo, chi = merged_cell
+        if rowx >= rlo and rowx < rhi and colx >= clo and colx < chi:
+            return sheet.cell(rlo, clo)
+
+    return cell_at_loc
diff --git a/pandas/io/tests/data/merged_effective.xls b/pandas/io/tests/data/merged_effective.xls
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -18,7 +18,7 @@
 import pandas.io.parsers as parsers
 from pandas.io.parsers import (read_csv, read_table, read_fwf,
                                 TextParser, TextFileReader)
-from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
+from pandas.io.excel import ExcelFile, ExcelWriter, read_excel, get_effective_cell
 from pandas.util.testing import (assert_almost_equal,
                                  assert_series_equal,
                                  network,
@@ -259,6 +259,35 @@ def test_excel_table(self):
                         skip_footer=1)
         tm.assert_frame_equal(df4, df.ix[:-1])
         tm.assert_frame_equal(df4, df5)
+
+    def test_read_effective_cells(self):
+        _skip_if_no_xlrd()
+        import xlrd
+
+        pth = os.path.join(self.dirpath, 'merged_effective.xls')
+        xls = ExcelFile(pth, formatting_info=True)        
+        book = xls.book
+        sheet = book.sheet_by_index(0)
+        self.assertEqual(get_effective_cell(sheet, 0, 0).value, 1)
+        self.assertEqual(get_effective_cell(sheet, 0, 4).value, 5)
+
+        self.assertEqual(get_effective_cell(sheet, 1, 0).value, "a")
+        self.assertEqual(get_effective_cell(sheet, 1, 1).value, "b") #Top left of merged
+        self.assertEqual(get_effective_cell(sheet, 1, 2).value, "b") #merged
+        self.assertEqual(get_effective_cell(sheet, 1, 3).value, "c")
+        self.assertEqual(get_effective_cell(sheet, 1, 4).value, "d")
+
+        self.assert_(get_effective_cell(sheet, 2, 0).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
+        self.assertEqual(get_effective_cell(sheet, 2, 1).value, "b") #merged
+        self.assertEqual(get_effective_cell(sheet, 2, 2).value, "b") #merged        
+        self.assert_(get_effective_cell(sheet, 2, 3).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
+        self.assert_(get_effective_cell(sheet, 2, 4).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
+
+        self.assertEqual(get_effective_cell(sheet, 3, 0).value, 1)
+        self.assert_(get_effective_cell(sheet, 3, 1).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
+        self.assert_(get_effective_cell(sheet, 3, 2).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY))
+        self.assertEqual(get_effective_cell(sheet, 3, 3).value, 4)
+        self.assertEqual(get_effective_cell(sheet, 3, 4).value, 5)
 
     def test_excel_read_buffer(self):
         _skip_if_no_xlrd()