|
3 | 3 | """
|
4 | 4 |
|
5 | 5 | from StringIO import StringIO
|
| 6 | +import zipfile |
6 | 7 |
|
7 | 8 | import numpy as np
|
8 | 9 |
|
9 | 10 | from pandas.core.index import Index, MultiIndex
|
10 | 11 | from pandas.core.frame import DataFrame
|
11 | 12 | import pandas._tseries as lib
|
12 | 13 |
|
13 |
| - |
14 | 14 | def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
|
15 | 15 | skiprows=None, na_values=None, parse_dates=False,
|
16 | 16 | date_parser=None, nrows=None, iterator=False, chunksize=None):
|
@@ -117,6 +117,18 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
|
117 | 117 | """ % (_parser_params % _table_sep)
|
118 | 118 |
|
119 | 119 |
|
| 120 | +class BufferedReader(object): |
| 121 | + """ |
| 122 | + For handling different kinds of files, e.g. zip files where reading out a |
| 123 | + chunk of lines is faster than reading out one line at a time. |
| 124 | + """ |
| 125 | + |
| 126 | + def __init__(self, fh, delimiter=','): |
| 127 | + pass |
| 128 | + |
| 129 | +class BufferedCSVReader(BufferedReader): |
| 130 | + pass |
| 131 | + |
120 | 132 | class TextParser(object):
|
121 | 133 | """
|
122 | 134 | Converts lists of lists/tuples into DataFrames with proper type inference
|
@@ -176,6 +188,7 @@ def __init__(self, data, names=None, header=0, index_col=None,
|
176 | 188 |
|
177 | 189 | self.columns = self._infer_columns()
|
178 | 190 | self.index_name = self._get_index_name()
|
| 191 | + self._first_chunk = True |
179 | 192 |
|
180 | 193 | def _infer_columns(self):
|
181 | 194 | names = self.names
|
@@ -238,7 +251,8 @@ def _clear_buffer(self):
|
238 | 251 |
|
239 | 252 | def __iter__(self):
|
240 | 253 | try:
|
241 |
| - yield self.get_chunk(self.chunksize) |
| 254 | + while True: |
| 255 | + yield self.get_chunk(self.chunksize) |
242 | 256 | except StopIteration:
|
243 | 257 | pass
|
244 | 258 |
|
@@ -280,7 +294,16 @@ def _get_index_name(self):
|
280 | 294 | return index_name
|
281 | 295 |
|
282 | 296 | def get_chunk(self, rows=None):
|
283 |
| - content = self._get_lines(rows) |
| 297 | + try: |
| 298 | + content = self._get_lines(rows) |
| 299 | + except StopIteration: |
| 300 | + if self._first_chunk: |
| 301 | + content = [] |
| 302 | + else: |
| 303 | + raise |
| 304 | + |
| 305 | + # done with first read, next time raise StopIteration |
| 306 | + self._first_chunk = False |
284 | 307 |
|
285 | 308 | if len(content) == 0: # pragma: no cover
|
286 | 309 | if self.index_col is not None:
|
@@ -357,7 +380,9 @@ def _get_lines(self, rows=None):
|
357 | 380 | while True:
|
358 | 381 | lines.append(source.next())
|
359 | 382 | except StopIteration:
|
360 |
| - pass |
| 383 | + if len(lines) == 0: |
| 384 | + raise |
| 385 | + self.pos += len(lines) |
361 | 386 |
|
362 | 387 | self.buf = []
|
363 | 388 |
|
|
0 commit comments