Skip to content

Commit afde3f2

Browse files
author
locojaydev
committed
adding na_repl, cols argument to excel formatter
1 parent b178066 commit afde3f2

File tree

5 files changed

+72
-39
lines changed

5 files changed

+72
-39
lines changed

pandas/core/format.py

+59-26
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
import pandas.lib as lib
1717

1818
import numpy as np
19+
1920
import itertools
2021

21-
from collections import namedtuple
2222

2323
docstring_to_string = """
2424
Parameters
@@ -686,8 +686,23 @@ def grouper(x):
686686
return result
687687

688688

689-
ExcelCell = namedtuple("ExcelCell",
690-
'row, col, val, style, mergestart, mergeend')
689+
#from collections import namedtuple
690+
# ExcelCell = namedtuple("ExcelCell",
691+
# 'row, col, val, style, mergestart, mergeend')
692+
693+
class ExcelCell:
694+
__fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
695+
__slots__ = __fields__
696+
697+
def __init__(self, row, col, val,
698+
style=None, mergestart=None, mergeend=None):
699+
self.row = row
700+
self.col = col
701+
self.val = val
702+
self.style = style
703+
self.mergestart = mergestart
704+
self.mergeend = mergeend
705+
691706

692707
header_style = {"font": {"bold": True},
693708
"borders": {"top": "thin",
@@ -698,47 +713,65 @@ def grouper(x):
698713

699714

700715
class ExcelFormatter(object):
716+
"""
717+
Class for formatting a DataFrame to a list of ExcelCells,
718+
719+
Parameters
720+
----------
721+
df : dataframe
722+
na_rep: na representation
723+
index : boolean, default True
724+
output row names (index)
725+
cols : sequence, optional
726+
Columns to write
727+
"""
701728

702-
def __init__(self, df):
729+
def __init__(self, df, na_rep='', cols=None):
703730
self.df = df
704731
self.rowcounter = 0
732+
self.na_rep = na_rep
733+
self.columns = cols
734+
if cols is None:
735+
self.columns = df.columns
736+
737+
def _format_value(self, val):
738+
if lib.checknull(val):
739+
val = self.na_rep
740+
return val
705741

706742
def _format_header_mi(self):
707-
levels = self.df.columns.format(sparsify=True, adjoin=False,
743+
levels = self.columns.format(sparsify=True, adjoin=False,
708744
names=False)
709-
level_lenghts = fmt._get_level_lengths(levels)
745+
level_lenghts = _get_level_lengths(levels)
710746
for lnum, (records, values) in enumerate(zip(level_lenghts,
711747
levels)):
712-
name = self.df.columns.names[lnum]
713-
yield ExcelCell(lnum, 0, name, header_style, None, None)
748+
name = self.columns.names[lnum]
749+
yield ExcelCell(lnum, 0, name, header_style)
714750
for i in records:
715751
if records[i] > 1:
716752
yield ExcelCell(lnum, i + 1, values[i],
717753
header_style, lnum, i + records[i])
718754
else:
719-
yield ExcelCell(lnum, i + 1, values[i],
720-
header_style, None, None)
755+
yield ExcelCell(lnum, i + 1, values[i], header_style)
721756

722757
self.rowcounter = lnum
723758

724759
def _format_header_regular(self):
725-
for colindex, colname in enumerate(self.df.columns):
726-
yield ExcelCell(self.rowcounter, colindex,
727-
colname, header_style, None, None)
760+
for colindex, colname in enumerate(self.columns):
761+
yield ExcelCell(self.rowcounter, colindex, colname, header_style)
728762

729763
def _format_header(self):
730-
if isinstance(self.df.columns, MultiIndex):
764+
if isinstance(self.columns, MultiIndex):
731765
gen = self._format_header_mi()
732766
else:
733767
gen = self._format_header_regular()
734768

735769
gen2 = ()
736770
if self.df.index.names:
737771
row = [x if x is not None else ''
738-
for x in self.df.index.names] + [''] * len(self.df.columns)
772+
for x in self.df.index.names] + [''] * len(self.columns)
739773
if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
740-
gen2 = (ExcelCell(self.rowcounter, colindex, val,
741-
header_style, None, None)
774+
gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
742775
for colindex, val in enumerate(row))
743776
self.rowcounter += 1
744777
return itertools.chain(gen, gen2)
@@ -752,26 +785,26 @@ def _format_body(self):
752785

753786
def _format_regular_rows(self):
754787
self.rowcounter += 1
755-
for colidx, colname in enumerate(self.df.columns):
788+
for colidx, colname in enumerate(self.columns):
756789
series = self.df[colname]
757790
for i, val in enumerate(series):
758-
yield ExcelCell(self.rowcounter + i, colidx,
759-
val, None, None, None)
791+
yield ExcelCell(self.rowcounter + i, colidx, val)
760792

761793
def _format_hierarchical_rows(self):
762794
self.rowcounter += 1
763795
for idx, idxval in enumerate(self.df.index):
764-
yield ExcelCell(self.rowcounter + idx, 0,
765-
idxval, header_style, None, None)
796+
yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
766797

767-
for colidx, colname in enumerate(self.df.columns):
798+
for colidx, colname in enumerate(self.columns):
768799
series = self.df[colname]
769800
for i, val in enumerate(series):
770-
yield ExcelCell(self.rowcounter + i,
771-
colidx + 1, val, None, None, None)
801+
yield ExcelCell(self.rowcounter + i, colidx + 1, val)
772802

773803
def get_formatted_cells(self):
774-
return itertools.chain(self._format_header(), self._format_body())
804+
for cell in itertools.chain(self._format_header(),
805+
self._format_body()):
806+
cell.val = self._format_value(cell.val)
807+
yield cell
775808

776809
#----------------------------------------------------------------------
777810
# Array formatters

pandas/core/frame.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -1180,7 +1180,7 @@ def to_panel(self):
11801180

11811181
to_wide = deprecate('to_wide', to_panel)
11821182

1183-
def _helper_csvexcel(self, writer, na_rep=None, cols=None,
1183+
def _helper_csv(self, writer, na_rep=None, cols=None,
11841184
header=True, index=True,
11851185
index_label=None, float_format=None):
11861186
if cols is None:
@@ -1315,7 +1315,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
13151315
else:
13161316
csvout = csv.writer(f, lineterminator='\n', delimiter=sep,
13171317
quoting=quoting)
1318-
self._helper_csvexcel(csvout, na_rep=na_rep,
1318+
self._helper_csv(csvout, na_rep=na_rep,
13191319
float_format=float_format, cols=cols,
13201320
header=header, index=index,
13211321
index_label=index_label)
@@ -1368,16 +1368,13 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
13681368
from pandas.io.parsers import ExcelWriter
13691369
need_save = False
13701370
if isinstance(excel_writer, basestring):
1371-
excel_writer = ExcelWriter(excel_writer)
1371+
excel_writer = ExcelWriter(excel_writer, na_rep=na_rep)
13721372
need_save = True
1373-
# excel_writer.cur_sheet = sheet_name
1374-
# self._helper_csvexcel(excel_writer, na_rep=na_rep,
1375-
# float_format=float_format, cols=cols,
1376-
# header=header, index=index,
1377-
# index_label=index_label)
1378-
formatter = fmt.ExcelFormatter(self)
1373+
1374+
formatter = fmt.ExcelFormatter(self, na_rep=na_rep, cols=cols)
13791375
formatted_cells = formatter.get_formatted_cells()
1380-
excel_writer.write_cells(formatted_cells, sheet_name)
1376+
excel_writer.write_cells(formatted_cells, sheet_name,
1377+
startrow=startrow, startcol=startcol)
13811378
if need_save:
13821379
excel_writer.save()
13831380

pandas/io/parsers.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1959,8 +1959,10 @@ def _conv_value(val):
19591959
#convert value for excel dump
19601960
if isinstance(val, np.int64):
19611961
val = int(val)
1962-
if isinstance(val, np.bool8):
1962+
elif isinstance(val, np.bool8):
19631963
val = bool(val)
1964+
elif isinstance(val, lib.Timestamp):
1965+
val = val._repr_base
19641966

19651967
return val
19661968

@@ -2021,6 +2023,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
20212023
self._writecells_xls(cells, sheet_name, startrow, startcol)
20222024

20232025
def _writecells_xlsx(self, cells, sheet_name, startrow, startcol):
2026+
20242027
from openpyxl.cell import get_column_letter
20252028

20262029
if sheet_name in self.sheets:

pandas/src/parse_helper.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <errno.h>
22
#include <float.h>
33

4-
double PANDAS_INLINE xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing);
4+
double xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing);
55

66
int to_double(char *item, double *p_value, char sci, char decimal)
77
{

pandas/src/parser/tokenizer.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ int P_INLINE end_field(parser_t *self) {
395395
return 0;
396396
}
397397

398-
int P_INLINE end_line(parser_t *self) {
398+
int end_line(parser_t *self) {
399399
int fields;
400400
khiter_t k; /* for hash set detection */
401401
int ex_fields = -1;

0 commit comments

Comments
 (0)