Skip to content

Commit 13080c8

Browse files
committed
Merge remote branch 'locojay/excelfancy'
* locojay/excelfancy: adding argument has index_labels to excel reader to handle index_labels not in the same row as columnnames adding styling test reader bug fix (colnames was None.1,....), datetime hadling, period hadling all attributes excelformatter handles multiindex, aliases adding float_format to ExcelFormatter adding na_repl, cols argument to excel formatter excel format excel format excel format
2 parents 95500f0 + 389da90 commit 13080c8

File tree

4 files changed

+609
-83
lines changed

4 files changed

+609
-83
lines changed

pandas/core/format.py

+215
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
import numpy as np
1919

20+
import itertools
21+
22+
2023
docstring_to_string = """
2124
Parameters
2225
----------
@@ -400,6 +403,7 @@ def _get_column_name_list(self):
400403
names.append('' if columns.name is None else columns.name)
401404
return names
402405

406+
403407
class HTMLFormatter(object):
404408

405409
indent_delta = 2
@@ -674,6 +678,217 @@ def grouper(x):
674678

675679
return result
676680

681+
682+
#from collections import namedtuple
683+
# ExcelCell = namedtuple("ExcelCell",
684+
# 'row, col, val, style, mergestart, mergeend')
685+
686+
class ExcelCell:
687+
__fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
688+
__slots__ = __fields__
689+
690+
def __init__(self, row, col, val,
691+
style=None, mergestart=None, mergeend=None):
692+
self.row = row
693+
self.col = col
694+
self.val = val
695+
self.style = style
696+
self.mergestart = mergestart
697+
self.mergeend = mergeend
698+
699+
700+
header_style = {"font": {"bold": True},
701+
"borders": {"top": "thin",
702+
"right": "thin",
703+
"bottom": "thin",
704+
"left": "thin"},
705+
"alignment": {"horizontal": "center"}}
706+
707+
708+
class ExcelFormatter(object):
709+
"""
710+
Class for formatting a DataFrame to a list of ExcelCells,
711+
712+
Parameters
713+
----------
714+
df : dataframe
715+
na_rep: na representation
716+
float_format : string, default None
717+
Format string for floating point numbers
718+
cols : sequence, optional
719+
Columns to write
720+
header : boolean or list of string, default True
721+
Write out column names. If a list of string is given it is
722+
assumed to be aliases for the column names
723+
index : boolean, default True
724+
output row names (index)
725+
index_label : string or sequence, default None
726+
Column label for index column(s) if desired. If None is given, and
727+
`header` and `index` are True, then the index names are used. A
728+
sequence should be given if the DataFrame uses MultiIndex.
729+
"""
730+
731+
def __init__(self,
732+
df,
733+
na_rep='',
734+
float_format=None,
735+
cols=None,
736+
header=True,
737+
index=True,
738+
index_label=None
739+
):
740+
self.df = df
741+
self.rowcounter = 0
742+
self.na_rep = na_rep
743+
self.columns = cols
744+
if cols is None:
745+
self.columns = df.columns
746+
self.float_format = float_format
747+
self.index = index
748+
self.index_label = index_label
749+
self.header = header
750+
751+
def _format_value(self, val):
752+
if lib.checknull(val):
753+
val = self.na_rep
754+
if self.float_format is not None and com.is_float(val):
755+
val = float(self.float_format % val)
756+
return val
757+
758+
def _format_header_mi(self):
759+
levels = self.columns.format(sparsify=True, adjoin=False,
760+
names=False)
761+
level_lenghts = _get_level_lengths(levels)
762+
coloffset = 0
763+
if isinstance(self.df.index, MultiIndex):
764+
coloffset = len(self.df.index[0]) - 1
765+
766+
for lnum, (records, values) in enumerate(zip(level_lenghts,
767+
levels)):
768+
name = self.columns.names[lnum]
769+
yield ExcelCell(lnum, coloffset, name, header_style)
770+
for i in records:
771+
if records[i] > 1:
772+
yield ExcelCell(lnum,coloffset + i + 1, values[i],
773+
header_style, lnum, coloffset + i + records[i])
774+
else:
775+
yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style)
776+
777+
self.rowcounter = lnum
778+
779+
def _format_header_regular(self):
780+
has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
781+
if has_aliases or self.header:
782+
coloffset = 0
783+
if self.index:
784+
coloffset = 1
785+
if isinstance(self.df.index, MultiIndex):
786+
coloffset = len(self.df.index[0])
787+
788+
colnames = self.columns
789+
if has_aliases:
790+
if len(self.header) != len(self.columns):
791+
raise ValueError(('Writing %d cols but got %d aliases'
792+
% (len(self.columns), len(self.header))))
793+
else:
794+
colnames = self.header
795+
796+
for colindex, colname in enumerate(colnames):
797+
yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
798+
header_style)
799+
800+
def _format_header(self):
801+
if isinstance(self.columns, MultiIndex):
802+
gen = self._format_header_mi()
803+
else:
804+
gen = self._format_header_regular()
805+
806+
gen2 = ()
807+
if self.df.index.names:
808+
row = [x if x is not None else ''
809+
for x in self.df.index.names] + [''] * len(self.columns)
810+
if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
811+
gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
812+
for colindex, val in enumerate(row))
813+
self.rowcounter += 1
814+
return itertools.chain(gen, gen2)
815+
816+
def _format_body(self):
817+
818+
if isinstance(self.df.index, MultiIndex):
819+
return self._format_hierarchical_rows()
820+
else:
821+
return self._format_regular_rows()
822+
823+
def _format_regular_rows(self):
824+
self.rowcounter += 1
825+
826+
coloffset = 0
827+
#output index and index_label?
828+
if self.index:
829+
#chek aliases
830+
#if list only take first as this is not a MultiIndex
831+
if self.index_label and isinstance(self.index_label,
832+
(list, tuple, np.ndarray)):
833+
index_label = self.index_label[0]
834+
#if string good to go
835+
elif self.index_label and isinstance(self.index_label, str):
836+
index_label = self.index_label
837+
else:
838+
index_label = self.df.index.names[0]
839+
840+
if index_label:
841+
yield ExcelCell(self.rowcounter, 0,
842+
index_label, header_style)
843+
self.rowcounter += 1
844+
845+
#write index_values
846+
index_values = self.df.index
847+
coloffset = 1
848+
for idx, idxval in enumerate(index_values):
849+
yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
850+
851+
for colidx, colname in enumerate(self.columns):
852+
series = self.df[colname]
853+
for i, val in enumerate(series):
854+
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val)
855+
856+
def _format_hierarchical_rows(self):
857+
self.rowcounter += 1
858+
859+
gcolidx = 0
860+
#output index and index_label?
861+
if self.index:
862+
index_labels = self.df.index.names
863+
#check for aliases
864+
if self.index_label and isinstance(self.index_label,
865+
(list, tuple, np.ndarray)):
866+
index_labels = self.index_label
867+
868+
#if index labels are not empty go ahead and dump
869+
if filter(lambda x: x is not None, index_labels):
870+
for cidx, name in enumerate(index_labels):
871+
yield ExcelCell(self.rowcounter, cidx,
872+
name, header_style)
873+
self.rowcounter += 1
874+
875+
for indexcolvals in zip(*self.df.index):
876+
for idx, indexcolval in enumerate(indexcolvals):
877+
yield ExcelCell(self.rowcounter + idx, gcolidx,
878+
indexcolval, header_style)
879+
gcolidx += 1
880+
881+
for colidx, colname in enumerate(self.columns):
882+
series = self.df[colname]
883+
for i, val in enumerate(series):
884+
yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)
885+
886+
def get_formatted_cells(self):
887+
for cell in itertools.chain(self._format_header(),
888+
self._format_body()):
889+
cell.val = self._format_value(cell.val)
890+
yield cell
891+
677892
#----------------------------------------------------------------------
678893
# Array formatters
679894

pandas/core/frame.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -1226,7 +1226,7 @@ def to_panel(self):
12261226

12271227
to_wide = deprecate('to_wide', to_panel)
12281228

1229-
def _helper_csvexcel(self, writer, na_rep=None, cols=None,
1229+
def _helper_csv(self, writer, na_rep=None, cols=None,
12301230
header=True, index=True,
12311231
index_label=None, float_format=None):
12321232
if cols is None:
@@ -1361,7 +1361,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
13611361
else:
13621362
csvout = csv.writer(f, lineterminator='\n', delimiter=sep,
13631363
quoting=quoting)
1364-
self._helper_csvexcel(csvout, na_rep=na_rep,
1364+
self._helper_csv(csvout, na_rep=na_rep,
13651365
float_format=float_format, cols=cols,
13661366
header=header, index=index,
13671367
index_label=index_label)
@@ -1372,7 +1372,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
13721372

13731373
def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
13741374
float_format=None, cols=None, header=True, index=True,
1375-
index_label=None):
1375+
index_label=None, startrow=0, startcol=0):
13761376
"""
13771377
Write DataFrame to a excel sheet
13781378
@@ -1397,6 +1397,9 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
13971397
Column label for index column(s) if desired. If None is given, and
13981398
`header` and `index` are True, then the index names are used. A
13991399
sequence should be given if the DataFrame uses MultiIndex.
1400+
startow : upper left cell row to dump data frame
1401+
startcol : upper left cell column to dump data frame
1402+
14001403
14011404
Notes
14021405
-----
@@ -1413,11 +1416,17 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
14131416
if isinstance(excel_writer, basestring):
14141417
excel_writer = ExcelWriter(excel_writer)
14151418
need_save = True
1416-
excel_writer.cur_sheet = sheet_name
1417-
self._helper_csvexcel(excel_writer, na_rep=na_rep,
1418-
float_format=float_format, cols=cols,
1419-
header=header, index=index,
1420-
index_label=index_label)
1419+
1420+
formatter = fmt.ExcelFormatter(self,
1421+
na_rep=na_rep,
1422+
cols=cols,
1423+
header=header,
1424+
float_format=float_format,
1425+
index=index,
1426+
index_label=index_label)
1427+
formatted_cells = formatter.get_formatted_cells()
1428+
excel_writer.write_cells(formatted_cells, sheet_name,
1429+
startrow=startrow, startcol=startcol)
14211430
if need_save:
14221431
excel_writer.save()
14231432

0 commit comments

Comments
 (0)