@@ -1208,8 +1208,8 @@ def __repr__(self):
1208
1208
return object .__repr__ (self )
1209
1209
1210
1210
def parse (self , sheetname , header = 0 , skiprows = None , index_col = None ,
1211
- parse_dates = False , date_parser = None , na_values = None ,
1212
- thousands = None , chunksize = None ):
1211
+ parse_cols = None , parse_dates = False , date_parser = None ,
1212
+ na_values = None , thousands = None , chunksize = None ):
1213
1213
"""
1214
1214
Read Excel table into DataFrame
1215
1215
@@ -1224,6 +1224,10 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
1224
1224
index_col : int, default None
1225
1225
Column to use as the row labels of the DataFrame. Pass None if
1226
1226
there is no such column
1227
+ parse_cols : int or list, default None
1228
+ If None then parse all columns,
1229
+ If int then indicates last column to be parsed
1230
+ If list of ints then indicates list of column numbers to be parsed
1227
1231
na_values : list-like, default None
1228
1232
List of additional strings to recognize as NA/NaN
1229
1233
@@ -1235,21 +1239,38 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
1235
1239
False :self ._parse_xls }
1236
1240
return choose [self .use_xlsx ](sheetname , header = header ,
1237
1241
skiprows = skiprows , index_col = index_col ,
1242
+ parse_cols = parse_cols ,
1238
1243
parse_dates = parse_dates ,
1239
1244
date_parser = date_parser ,
1240
1245
na_values = na_values ,
1241
1246
thousands = thousands ,
1242
1247
chunksize = chunksize )
1243
1248
1249
+ def _should_parse (self , i , parse_cols ):
1250
+ if isinstance (parse_cols , int ):
1251
+ return i <= parse_cols
1252
+ else :
1253
+ return i in parse_cols
1254
+
1244
1255
def _parse_xlsx (self , sheetname , header = 0 , skiprows = None , index_col = None ,
1245
- parse_dates = False , date_parser = None , na_values = None ,
1246
- thousands = None , chunksize = None ):
1256
+ parse_cols = None , parse_dates = False , date_parser = None ,
1257
+ na_values = None , thousands = None , chunksize = None ):
1247
1258
sheet = self .book .get_sheet_by_name (name = sheetname )
1248
1259
data = []
1249
1260
1250
1261
# it brings a new method: iter_rows()
1262
+ should_parse = {}
1263
+
1251
1264
for row in sheet .iter_rows ():
1252
- data .append ([cell .internal_value for cell in row ])
1265
+ row_data = []
1266
+ for j , cell in enumerate (row ):
1267
+
1268
+ if parse_cols is not None and j not in should_parse :
1269
+ should_parse [j ] = self ._should_parse (j , parse_cols )
1270
+
1271
+ if parse_cols is None or should_parse [j ]:
1272
+ row_data .append (cell .internal_value )
1273
+ data .append (row_data )
1253
1274
1254
1275
if header is not None :
1255
1276
data [header ] = _trim_excel_header (data [header ])
@@ -1265,28 +1286,34 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
1265
1286
return parser .get_chunk ()
1266
1287
1267
1288
def _parse_xls (self , sheetname , header = 0 , skiprows = None , index_col = None ,
1268
- parse_dates = False , date_parser = None , na_values = None ,
1269
- thousands = None , chunksize = None ):
1289
+ parse_cols = None , parse_dates = False , date_parser = None ,
1290
+ na_values = None , thousands = None , chunksize = None ):
1270
1291
from datetime import MINYEAR , time , datetime
1271
1292
from xlrd import xldate_as_tuple , XL_CELL_DATE , XL_CELL_ERROR
1272
1293
1273
1294
datemode = self .book .datemode
1274
1295
sheet = self .book .sheet_by_name (sheetname )
1275
1296
1276
1297
data = []
1298
+ should_parse = {}
1277
1299
for i in range (sheet .nrows ):
1278
1300
row = []
1279
- for value , typ in izip (sheet .row_values (i ), sheet .row_types (i )):
1280
- if typ == XL_CELL_DATE :
1281
- dt = xldate_as_tuple (value , datemode )
1282
- # how to produce this first case?
1283
- if dt [0 ] < MINYEAR : # pragma: no cover
1284
- value = time (* dt [3 :])
1285
- else :
1286
- value = datetime (* dt )
1287
- if typ == XL_CELL_ERROR :
1288
- value = np .nan
1289
- row .append (value )
1301
+ for j , (value , typ ) in enumerate (izip (sheet .row_values (i ),
1302
+ sheet .row_types (i ))):
1303
+ if parse_cols is not None and j not in should_parse :
1304
+ should_parse [j ] = self ._should_parse (j , parse_cols )
1305
+
1306
+ if parse_cols is None or should_parse [j ]:
1307
+ if typ == XL_CELL_DATE :
1308
+ dt = xldate_as_tuple (value , datemode )
1309
+ # how to produce this first case?
1310
+ if dt [0 ] < MINYEAR : # pragma: no cover
1311
+ value = time (* dt [3 :])
1312
+ else :
1313
+ value = datetime (* dt )
1314
+ if typ == XL_CELL_ERROR :
1315
+ value = np .nan
1316
+ row .append (value )
1290
1317
data .append (row )
1291
1318
1292
1319
if header is not None :
0 commit comments