Skip to content

Commit 3646247

Browse files
committed
ENH: tweaks. tests needed
1 parent 9472428 commit 3646247

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

pandas/io/parsers.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
"""
44

55
from StringIO import StringIO
6+
import zipfile
67

78
import numpy as np
89

910
from pandas.core.index import Index, MultiIndex
1011
from pandas.core.frame import DataFrame
1112
import pandas._tseries as lib
1213

13-
1414
def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
1515
skiprows=None, na_values=None, parse_dates=False,
1616
date_parser=None, nrows=None, iterator=False, chunksize=None):
@@ -117,6 +117,18 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
117117
""" % (_parser_params % _table_sep)
118118

119119

120+
class BufferedReader(object):
121+
"""
122+
For handling different kinds of files, e.g. zip files where reading out a
123+
chunk of lines is faster than reading out one line at a time.
124+
"""
125+
126+
def __init__(self, fh, delimiter=','):
127+
pass
128+
129+
class BufferedCSVReader(BufferedReader):
130+
pass
131+
120132
class TextParser(object):
121133
"""
122134
Converts lists of lists/tuples into DataFrames with proper type inference
@@ -176,6 +188,7 @@ def __init__(self, data, names=None, header=0, index_col=None,
176188

177189
self.columns = self._infer_columns()
178190
self.index_name = self._get_index_name()
191+
self._first_chunk = True
179192

180193
def _infer_columns(self):
181194
names = self.names
@@ -238,7 +251,8 @@ def _clear_buffer(self):
238251

239252
def __iter__(self):
240253
try:
241-
yield self.get_chunk(self.chunksize)
254+
while True:
255+
yield self.get_chunk(self.chunksize)
242256
except StopIteration:
243257
pass
244258

@@ -280,7 +294,16 @@ def _get_index_name(self):
280294
return index_name
281295

282296
def get_chunk(self, rows=None):
283-
content = self._get_lines(rows)
297+
try:
298+
content = self._get_lines(rows)
299+
except StopIteration:
300+
if self._first_chunk:
301+
content = []
302+
else:
303+
raise
304+
305+
# done with first read, next time raise StopIteration
306+
self._first_chunk = False
284307

285308
if len(content) == 0: # pragma: no cover
286309
if self.index_col is not None:
@@ -357,7 +380,9 @@ def _get_lines(self, rows=None):
357380
while True:
358381
lines.append(source.next())
359382
except StopIteration:
360-
pass
383+
if len(lines) == 0:
384+
raise
385+
self.pos += len(lines)
361386

362387
self.buf = []
363388

0 commit comments

Comments
 (0)