Skip to content

Commit 9ceea2f

Browse files
author
Chang She
committed
Add skip_footer to ExcelFile.parse and alias skipfooter/skip_footer
1 parent e03bfcd commit 9ceea2f

File tree

2 files changed

+99
-53
lines changed

2 files changed

+99
-53
lines changed

pandas/io/parsers.py

+77-53
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ def _is_url(url):
152152
def _read(cls, filepath_or_buffer, kwds):
153153
"Generic reader of line files."
154154
encoding = kwds.get('encoding', None)
155+
skipfooter = kwds.pop('skipfooter', None)
156+
if skipfooter is not None:
157+
kwds['skip_footer'] = skipfooter
155158

156159
if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
157160
from urllib2 import urlopen
@@ -218,28 +221,31 @@ def read_csv(filepath_or_buffer,
218221
verbose=False,
219222
delimiter=None,
220223
encoding=None,
221-
squeeze=False):
222-
kwds = dict(filepath_or_buffer=filepath_or_buffer,
223-
sep=sep, dialect=dialect,
224-
header=header, index_col=index_col,
225-
names=names, skiprows=skiprows,
226-
na_values=na_values, keep_default_na=keep_default_na,
227-
thousands=thousands,
228-
comment=comment, parse_dates=parse_dates,
229-
keep_date_col=keep_date_col,
230-
dayfirst=dayfirst, date_parser=date_parser,
231-
nrows=nrows, iterator=iterator,
232-
chunksize=chunksize, skip_footer=skip_footer,
233-
converters=converters, verbose=verbose,
234-
delimiter=delimiter, encoding=encoding,
235-
squeeze=squeeze)
224+
squeeze=False,
225+
**kwds):
226+
kdict = dict(filepath_or_buffer=filepath_or_buffer,
227+
sep=sep, dialect=dialect,
228+
header=header, index_col=index_col,
229+
names=names, skiprows=skiprows,
230+
na_values=na_values, keep_default_na=keep_default_na,
231+
thousands=thousands,
232+
comment=comment, parse_dates=parse_dates,
233+
keep_date_col=keep_date_col,
234+
dayfirst=dayfirst, date_parser=date_parser,
235+
nrows=nrows, iterator=iterator,
236+
chunksize=chunksize, skip_footer=skip_footer,
237+
converters=converters, verbose=verbose,
238+
delimiter=delimiter, encoding=encoding,
239+
squeeze=squeeze)
240+
241+
kdict.update(kwds)
236242

237243
# Alias sep -> delimiter.
238-
sep = kwds.pop('sep')
239-
if kwds.get('delimiter', None) is None:
240-
kwds['delimiter'] = sep
244+
sep = kdict.pop('sep')
245+
if kdict.get('delimiter', None) is None:
246+
kdict['delimiter'] = sep
241247

242-
return _read(TextParser, filepath_or_buffer, kwds)
248+
return _read(TextParser, filepath_or_buffer, kdict)
243249

244250
@Appender(_read_table_doc)
245251
def read_table(filepath_or_buffer,
@@ -265,31 +271,34 @@ def read_table(filepath_or_buffer,
265271
verbose=False,
266272
delimiter=None,
267273
encoding=None,
268-
squeeze=False):
269-
kwds = dict(filepath_or_buffer=filepath_or_buffer,
270-
sep=sep, dialect=dialect,
271-
header=header, index_col=index_col,
272-
names=names, skiprows=skiprows,
273-
na_values=na_values, keep_default_na=keep_default_na,
274-
thousands=thousands,
275-
comment=comment, parse_dates=parse_dates,
276-
keep_date_col=keep_date_col,
277-
dayfirst=dayfirst, date_parser=date_parser,
278-
nrows=nrows, iterator=iterator,
279-
chunksize=chunksize, skip_footer=skip_footer,
280-
converters=converters, verbose=verbose,
281-
delimiter=delimiter, encoding=encoding,
282-
squeeze=squeeze)
274+
squeeze=False,
275+
**kwds):
276+
kdict = dict(filepath_or_buffer=filepath_or_buffer,
277+
sep=sep, dialect=dialect,
278+
header=header, index_col=index_col,
279+
names=names, skiprows=skiprows,
280+
na_values=na_values, keep_default_na=keep_default_na,
281+
thousands=thousands,
282+
comment=comment, parse_dates=parse_dates,
283+
keep_date_col=keep_date_col,
284+
dayfirst=dayfirst, date_parser=date_parser,
285+
nrows=nrows, iterator=iterator,
286+
chunksize=chunksize, skip_footer=skip_footer,
287+
converters=converters, verbose=verbose,
288+
delimiter=delimiter, encoding=encoding,
289+
squeeze=squeeze)
290+
291+
kdict.update(kwds)
283292

284293
# Alias sep -> delimiter.
285-
sep = kwds.pop('sep')
286-
if kwds.get('delimiter', None) is None:
287-
kwds['delimiter'] = sep
294+
sep = kdict.pop('sep')
295+
if kdict.get('delimiter', None) is None:
296+
kdict['delimiter'] = sep
288297

289298
# Override as default encoding.
290-
kwds['encoding'] = None
299+
kdict['encoding'] = None
291300

292-
return _read(TextParser, filepath_or_buffer, kwds)
301+
return _read(TextParser, filepath_or_buffer, kdict)
293302

294303
@Appender(_read_fwf_doc)
295304
def read_fwf(filepath_or_buffer,
@@ -315,8 +324,9 @@ def read_fwf(filepath_or_buffer,
315324
delimiter=None,
316325
verbose=False,
317326
encoding=None,
318-
squeeze=False):
319-
kwds = dict(filepath_or_buffer=filepath_or_buffer,
327+
squeeze=False,
328+
**kwds):
329+
kdict = dict(filepath_or_buffer=filepath_or_buffer,
320330
colspecs=colspecs, widths=widths,
321331
header=header, index_col=index_col,
322332
names=names, skiprows=skiprows,
@@ -331,9 +341,11 @@ def read_fwf(filepath_or_buffer,
331341
delimiter=delimiter, encoding=encoding,
332342
squeeze=squeeze)
333343

344+
kdict.update(kwds)
345+
334346
# Check input arguments.
335-
colspecs = kwds.get('colspecs', None)
336-
widths = kwds.pop('widths', None)
347+
colspecs = kdict.get('colspecs', None)
348+
widths = kdict.pop('widths', None)
337349
if bool(colspecs is None) == bool(widths is None):
338350
raise ValueError("You must specify only one of 'widths' and "
339351
"'colspecs'")
@@ -344,10 +356,10 @@ def read_fwf(filepath_or_buffer,
344356
for w in widths:
345357
colspecs.append( (col, col+w) )
346358
col += w
347-
kwds['colspecs'] = colspecs
359+
kdict['colspecs'] = colspecs
348360

349-
kwds['thousands'] = thousands
350-
return _read(FixedWidthFieldParser, filepath_or_buffer, kwds)
361+
kdict['thousands'] = thousands
362+
return _read(FixedWidthFieldParser, filepath_or_buffer, kdict)
351363

352364
def read_clipboard(**kwargs): # pragma: no cover
353365
"""
@@ -1276,9 +1288,10 @@ def __init__(self, path_or_buf):
12761288
def __repr__(self):
12771289
return object.__repr__(self)
12781290

1279-
def parse(self, sheetname, header=0, skiprows=None, index_col=None,
1280-
parse_cols=None, parse_dates=False, date_parser=None,
1281-
na_values=None, thousands=None, chunksize=None):
1291+
def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
1292+
index_col=None, parse_cols=None, parse_dates=False,
1293+
date_parser=None, na_values=None, thousands=None, chunksize=None,
1294+
**kwds):
12821295
"""
12831296
Read Excel table into DataFrame
12841297
@@ -1289,7 +1302,9 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
12891302
header : int, default 0
12901303
Row to use for the column labels of the parsed DataFrame
12911304
skiprows : list-like
1292-
Row numbers to skip (0-indexed)
1305+
Rows to skip at the beginning (0-indexed)
1306+
skip_footer : int, default 0
1307+
Rows at the end to skip (0-indexed)
12931308
index_col : int, default None
12941309
Column to use as the row labels of the DataFrame. Pass None if
12951310
there is no such column
@@ -1304,6 +1319,10 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
13041319
-------
13051320
parsed : DataFrame
13061321
"""
1322+
skipfooter = kwds.pop('skipfooter', None)
1323+
if skipfooter is not None:
1324+
skip_footer = skipfooter
1325+
13071326
choose = {True:self._parse_xlsx,
13081327
False:self._parse_xls}
13091328
return choose[self.use_xlsx](sheetname, header=header,
@@ -1313,15 +1332,17 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
13131332
date_parser=date_parser,
13141333
na_values=na_values,
13151334
thousands=thousands,
1316-
chunksize=chunksize)
1335+
chunksize=chunksize,
1336+
skip_footer=skip_footer)
13171337

13181338
def _should_parse(self, i, parse_cols):
13191339
if isinstance(parse_cols, int):
13201340
return i <= parse_cols
13211341
else:
13221342
return i in parse_cols
13231343

1324-
def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
1344+
def _parse_xlsx(self, sheetname, header=0, skiprows=None,
1345+
skip_footer=0, index_col=None,
13251346
parse_cols=None, parse_dates=False, date_parser=None,
13261347
na_values=None, thousands=None, chunksize=None):
13271348
sheet = self.book.get_sheet_by_name(name=sheetname)
@@ -1350,11 +1371,13 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
13501371
parse_dates=parse_dates,
13511372
date_parser=date_parser,
13521373
skiprows=skiprows,
1374+
skip_footer=skip_footer,
13531375
chunksize=chunksize)
13541376

13551377
return parser.get_chunk()
13561378

1357-
def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
1379+
def _parse_xls(self, sheetname, header=0, skiprows=None,
1380+
skip_footer=0, index_col=None,
13581381
parse_cols=None, parse_dates=False, date_parser=None,
13591382
na_values=None, thousands=None, chunksize=None):
13601383
from datetime import MINYEAR, time, datetime
@@ -1394,6 +1417,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
13941417
parse_dates=parse_dates,
13951418
date_parser=date_parser,
13961419
skiprows=skiprows,
1420+
skip_footer=skip_footer,
13971421
chunksize=chunksize)
13981422

13991423
return parser.get_chunk()

pandas/io/tests/test_parsers.py

+22
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,13 @@ def test_excel_table(self):
762762
assert_frame_equal(df, df2)
763763
assert_frame_equal(df3, df2)
764764

765+
df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
766+
skipfooter=1)
767+
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
768+
skip_footer=1)
769+
assert_frame_equal(df4, df.ix[:-1])
770+
assert_frame_equal(df4, df5)
771+
765772
def test_excel_read_buffer(self):
766773
_skip_if_no_xlrd()
767774
_skip_if_no_openpyxl()
@@ -788,6 +795,13 @@ def test_xlsx_table(self):
788795
assert_frame_equal(df, df2)
789796
assert_frame_equal(df3, df2)
790797

798+
df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
799+
skipfooter=1)
800+
df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
801+
skip_footer=1)
802+
assert_frame_equal(df4, df.ix[:-1])
803+
assert_frame_equal(df4, df5)
804+
791805
def test_parse_cols_int(self):
792806
_skip_if_no_openpyxl()
793807
_skip_if_no_xlrd()
@@ -1125,6 +1139,14 @@ def test_skip_footer(self):
11251139
result = read_csv(StringIO(data), nrows=3)
11261140
assert_frame_equal(result, expected)
11271141

1142+
# skipfooter alias
1143+
result = read_csv(StringIO(data), skipfooter=2)
1144+
no_footer = '\n'.join(data.split('\n')[:-3])
1145+
expected = read_csv(StringIO(no_footer))
1146+
1147+
assert_frame_equal(result, expected)
1148+
1149+
11281150
def test_no_unnamed_index(self):
11291151
data = """ id c0 c1 c2
11301152
0 1 0 a b

0 commit comments

Comments
 (0)