1
1
"""
2
2
Module contains tools for processing files into DataFrames or other objects
3
3
"""
4
+ from __future__ import print_function
4
5
5
6
from StringIO import StringIO
6
7
import zipfile
13
14
14
15
def read_csv (filepath_or_buffer , sep = None , header = 0 , index_col = None , names = None ,
15
16
skiprows = None , na_values = None , parse_dates = False ,
16
- date_parser = None , nrows = None , iterator = False , chunksize = None ):
17
+ date_parser = None , nrows = None , iterator = False , chunksize = None ,
18
+ skip_footer = 0 ):
17
19
import csv
18
20
19
21
if hasattr (filepath_or_buffer , 'read' ):
@@ -50,7 +52,8 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
50
52
parse_dates = parse_dates ,
51
53
date_parser = date_parser ,
52
54
skiprows = skiprows ,
53
- chunksize = chunksize , buf = buf )
55
+ chunksize = chunksize , buf = buf ,
56
+ skip_footer = skip_footer )
54
57
55
58
if nrows is not None :
56
59
return parser .get_chunk (nrows )
@@ -62,12 +65,14 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
62
65
63
66
def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , index_col = None ,
64
67
names = None , skiprows = None , na_values = None , parse_dates = False ,
65
- date_parser = None , nrows = None , iterator = False , chunksize = None ):
68
+ date_parser = None , nrows = None , iterator = False , chunksize = None ,
69
+ skip_footer = 0 ):
66
70
return read_csv (filepath_or_buffer , sep = sep , header = header ,
67
71
skiprows = skiprows , index_col = index_col ,
68
72
na_values = na_values , date_parser = date_parser ,
69
73
names = names , parse_dates = parse_dates ,
70
- nrows = nrows , iterator = iterator , chunksize = chunksize )
74
+ nrows = nrows , iterator = iterator , chunksize = chunksize ,
75
+ skip_footer = skip_footer )
71
76
72
77
_parser_params = """Also supports optionally iterating or breaking of the file
73
78
into chunks.
@@ -98,6 +103,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
98
103
Return TextParser object
99
104
chunksize : int, default None
100
105
Return TextParser object for iteration
106
+ skip_footer : int, default 0
107
+ Number of line at bottom of file to skip
101
108
102
109
Returns
103
110
-------
@@ -163,7 +170,10 @@ class TextParser(object):
163
170
Custom NA values
164
171
parse_dates : boolean, default False
165
172
date_parser : function, default None
166
- skiprows
173
+ skiprows : list of integers
174
+ Row numbers to skip
175
+ skip_footer : int
176
+ Number of line at bottom of file to skip
167
177
"""
168
178
169
179
# common NA values
@@ -175,7 +185,7 @@ class TextParser(object):
175
185
176
186
def __init__ (self , data , names = None , header = 0 , index_col = None ,
177
187
na_values = None , parse_dates = False , date_parser = None ,
178
- chunksize = None , skiprows = None , buf = None ):
188
+ chunksize = None , skiprows = None , skip_footer = 0 , buf = None ):
179
189
"""
180
190
Workhorse function for processing nested list into DataFrame
181
191
@@ -195,6 +205,9 @@ def __init__(self, data, names=None, header=0, index_col=None,
195
205
self .chunksize = chunksize
196
206
self .passed_names = names is not None
197
207
self .skiprows = set () if skiprows is None else set (skiprows )
208
+ self .skip_footer = skip_footer
209
+
210
+ assert (self .skip_footer >= 0 )
198
211
199
212
if na_values is None :
200
213
self .na_values = self .NA_VALUES
@@ -306,6 +319,9 @@ def _get_index_name(self):
306
319
return index_name
307
320
308
321
def get_chunk (self , rows = None ):
322
+ if rows is not None and self .skip_footer :
323
+ print ('skip_footer not supported for iteration' )
324
+
309
325
try :
310
326
content = self ._get_lines (rows )
311
327
except StopIteration :
@@ -401,6 +417,9 @@ def _get_lines(self, rows=None):
401
417
402
418
self .buf = []
403
419
420
+ if self .skip_footer :
421
+ lines = lines [:- self .skip_footer ]
422
+
404
423
return lines
405
424
406
425
def _maybe_convert_int_mindex (index , parse_dates , date_parser ):
0 commit comments