|
17 | 17 |
|
18 | 18 | import numpy as np
|
19 | 19 |
|
| 20 | +import itertools |
| 21 | + |
| 22 | + |
20 | 23 | docstring_to_string = """
|
21 | 24 | Parameters
|
22 | 25 | ----------
|
@@ -400,6 +403,7 @@ def _get_column_name_list(self):
|
400 | 403 | names.append('' if columns.name is None else columns.name)
|
401 | 404 | return names
|
402 | 405 |
|
| 406 | + |
403 | 407 | class HTMLFormatter(object):
|
404 | 408 |
|
405 | 409 | indent_delta = 2
|
@@ -674,6 +678,217 @@ def grouper(x):
|
674 | 678 |
|
675 | 679 | return result
|
676 | 680 |
|
| 681 | + |
| 682 | +#from collections import namedtuple |
| 683 | +# ExcelCell = namedtuple("ExcelCell", |
| 684 | +# 'row, col, val, style, mergestart, mergeend') |
| 685 | + |
| 686 | +class ExcelCell: |
| 687 | + __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') |
| 688 | + __slots__ = __fields__ |
| 689 | + |
| 690 | + def __init__(self, row, col, val, |
| 691 | + style=None, mergestart=None, mergeend=None): |
| 692 | + self.row = row |
| 693 | + self.col = col |
| 694 | + self.val = val |
| 695 | + self.style = style |
| 696 | + self.mergestart = mergestart |
| 697 | + self.mergeend = mergeend |
| 698 | + |
| 699 | + |
| 700 | +header_style = {"font": {"bold": True}, |
| 701 | + "borders": {"top": "thin", |
| 702 | + "right": "thin", |
| 703 | + "bottom": "thin", |
| 704 | + "left": "thin"}, |
| 705 | + "alignment": {"horizontal": "center"}} |
| 706 | + |
| 707 | + |
| 708 | +class ExcelFormatter(object): |
| 709 | + """ |
| 710 | + Class for formatting a DataFrame to a list of ExcelCells, |
| 711 | +
|
| 712 | + Parameters |
| 713 | + ---------- |
| 714 | + df : dataframe |
| 715 | + na_rep: na representation |
| 716 | + float_format : string, default None |
| 717 | + Format string for floating point numbers |
| 718 | + cols : sequence, optional |
| 719 | + Columns to write |
| 720 | + header : boolean or list of string, default True |
| 721 | + Write out column names. If a list of string is given it is |
| 722 | + assumed to be aliases for the column names |
| 723 | + index : boolean, default True |
| 724 | + output row names (index) |
| 725 | + index_label : string or sequence, default None |
| 726 | + Column label for index column(s) if desired. If None is given, and |
| 727 | + `header` and `index` are True, then the index names are used. A |
| 728 | + sequence should be given if the DataFrame uses MultiIndex. |
| 729 | + """ |
| 730 | + |
| 731 | + def __init__(self, |
| 732 | + df, |
| 733 | + na_rep='', |
| 734 | + float_format=None, |
| 735 | + cols=None, |
| 736 | + header=True, |
| 737 | + index=True, |
| 738 | + index_label=None |
| 739 | + ): |
| 740 | + self.df = df |
| 741 | + self.rowcounter = 0 |
| 742 | + self.na_rep = na_rep |
| 743 | + self.columns = cols |
| 744 | + if cols is None: |
| 745 | + self.columns = df.columns |
| 746 | + self.float_format = float_format |
| 747 | + self.index = index |
| 748 | + self.index_label = index_label |
| 749 | + self.header = header |
| 750 | + |
| 751 | + def _format_value(self, val): |
| 752 | + if lib.checknull(val): |
| 753 | + val = self.na_rep |
| 754 | + if self.float_format is not None and com.is_float(val): |
| 755 | + val = float(self.float_format % val) |
| 756 | + return val |
| 757 | + |
| 758 | + def _format_header_mi(self): |
| 759 | + levels = self.columns.format(sparsify=True, adjoin=False, |
| 760 | + names=False) |
| 761 | + level_lenghts = _get_level_lengths(levels) |
| 762 | + coloffset = 0 |
| 763 | + if isinstance(self.df.index, MultiIndex): |
| 764 | + coloffset = len(self.df.index[0]) - 1 |
| 765 | + |
| 766 | + for lnum, (records, values) in enumerate(zip(level_lenghts, |
| 767 | + levels)): |
| 768 | + name = self.columns.names[lnum] |
| 769 | + yield ExcelCell(lnum, coloffset, name, header_style) |
| 770 | + for i in records: |
| 771 | + if records[i] > 1: |
| 772 | + yield ExcelCell(lnum,coloffset + i + 1, values[i], |
| 773 | + header_style, lnum, coloffset + i + records[i]) |
| 774 | + else: |
| 775 | + yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) |
| 776 | + |
| 777 | + self.rowcounter = lnum |
| 778 | + |
| 779 | + def _format_header_regular(self): |
| 780 | + has_aliases = isinstance(self.header, (tuple, list, np.ndarray)) |
| 781 | + if has_aliases or self.header: |
| 782 | + coloffset = 0 |
| 783 | + if self.index: |
| 784 | + coloffset = 1 |
| 785 | + if isinstance(self.df.index, MultiIndex): |
| 786 | + coloffset = len(self.df.index[0]) |
| 787 | + |
| 788 | + colnames = self.columns |
| 789 | + if has_aliases: |
| 790 | + if len(self.header) != len(self.columns): |
| 791 | + raise ValueError(('Writing %d cols but got %d aliases' |
| 792 | + % (len(self.columns), len(self.header)))) |
| 793 | + else: |
| 794 | + colnames = self.header |
| 795 | + |
| 796 | + for colindex, colname in enumerate(colnames): |
| 797 | + yield ExcelCell(self.rowcounter, colindex + coloffset, colname, |
| 798 | + header_style) |
| 799 | + |
| 800 | + def _format_header(self): |
| 801 | + if isinstance(self.columns, MultiIndex): |
| 802 | + gen = self._format_header_mi() |
| 803 | + else: |
| 804 | + gen = self._format_header_regular() |
| 805 | + |
| 806 | + gen2 = () |
| 807 | + if self.df.index.names: |
| 808 | + row = [x if x is not None else '' |
| 809 | + for x in self.df.index.names] + [''] * len(self.columns) |
| 810 | + if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): |
| 811 | + gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style) |
| 812 | + for colindex, val in enumerate(row)) |
| 813 | + self.rowcounter += 1 |
| 814 | + return itertools.chain(gen, gen2) |
| 815 | + |
| 816 | + def _format_body(self): |
| 817 | + |
| 818 | + if isinstance(self.df.index, MultiIndex): |
| 819 | + return self._format_hierarchical_rows() |
| 820 | + else: |
| 821 | + return self._format_regular_rows() |
| 822 | + |
| 823 | + def _format_regular_rows(self): |
| 824 | + self.rowcounter += 1 |
| 825 | + |
| 826 | + coloffset = 0 |
| 827 | + #output index and index_label? |
| 828 | + if self.index: |
| 829 | + #chek aliases |
| 830 | + #if list only take first as this is not a MultiIndex |
| 831 | + if self.index_label and isinstance(self.index_label, |
| 832 | + (list, tuple, np.ndarray)): |
| 833 | + index_label = self.index_label[0] |
| 834 | + #if string good to go |
| 835 | + elif self.index_label and isinstance(self.index_label, str): |
| 836 | + index_label = self.index_label |
| 837 | + else: |
| 838 | + index_label = self.df.index.names[0] |
| 839 | + |
| 840 | + if index_label: |
| 841 | + yield ExcelCell(self.rowcounter, 0, |
| 842 | + index_label, header_style) |
| 843 | + self.rowcounter += 1 |
| 844 | + |
| 845 | + #write index_values |
| 846 | + index_values = self.df.index |
| 847 | + coloffset = 1 |
| 848 | + for idx, idxval in enumerate(index_values): |
| 849 | + yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) |
| 850 | + |
| 851 | + for colidx, colname in enumerate(self.columns): |
| 852 | + series = self.df[colname] |
| 853 | + for i, val in enumerate(series): |
| 854 | + yield ExcelCell(self.rowcounter + i, colidx + coloffset, val) |
| 855 | + |
| 856 | + def _format_hierarchical_rows(self): |
| 857 | + self.rowcounter += 1 |
| 858 | + |
| 859 | + gcolidx = 0 |
| 860 | + #output index and index_label? |
| 861 | + if self.index: |
| 862 | + index_labels = self.df.index.names |
| 863 | + #check for aliases |
| 864 | + if self.index_label and isinstance(self.index_label, |
| 865 | + (list, tuple, np.ndarray)): |
| 866 | + index_labels = self.index_label |
| 867 | + |
| 868 | + #if index labels are not empty go ahead and dump |
| 869 | + if filter(lambda x: x is not None, index_labels): |
| 870 | + for cidx, name in enumerate(index_labels): |
| 871 | + yield ExcelCell(self.rowcounter, cidx, |
| 872 | + name, header_style) |
| 873 | + self.rowcounter += 1 |
| 874 | + |
| 875 | + for indexcolvals in zip(*self.df.index): |
| 876 | + for idx, indexcolval in enumerate(indexcolvals): |
| 877 | + yield ExcelCell(self.rowcounter + idx, gcolidx, |
| 878 | + indexcolval, header_style) |
| 879 | + gcolidx += 1 |
| 880 | + |
| 881 | + for colidx, colname in enumerate(self.columns): |
| 882 | + series = self.df[colname] |
| 883 | + for i, val in enumerate(series): |
| 884 | + yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) |
| 885 | + |
| 886 | + def get_formatted_cells(self): |
| 887 | + for cell in itertools.chain(self._format_header(), |
| 888 | + self._format_body()): |
| 889 | + cell.val = self._format_value(cell.val) |
| 890 | + yield cell |
| 891 | + |
677 | 892 | #----------------------------------------------------------------------
|
678 | 893 | # Array formatters
|
679 | 894 |
|
|
0 commit comments