ENH: tweaks. tests needed

wesm · wesm · commit 36462470ba72 · 2011-10-20T11:21:21.000-04:00
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -3,14 +3,14 @@
 """
 
 from StringIO import StringIO
+import zipfile
 
 import numpy as np
 
 from pandas.core.index import Index, MultiIndex
 from pandas.core.frame import DataFrame
 import pandas._tseries as lib
 
-
 def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
              skiprows=None, na_values=None, parse_dates=False,
              date_parser=None, nrows=None, iterator=False, chunksize=None):
@@ -117,6 +117,18 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
 """ % (_parser_params % _table_sep)
 
 
+class BufferedReader(object):
+    """
+    For handling different kinds of files, e.g. zip files where reading out a
+    chunk of lines is faster than reading out one line at a time.
+    """
+
+    def __init__(self, fh, delimiter=','):
+        pass
+
+class BufferedCSVReader(BufferedReader):
+    pass
+
 class TextParser(object):
     """
     Converts lists of lists/tuples into DataFrames with proper type inference
@@ -176,6 +188,7 @@ def __init__(self, data, names=None, header=0, index_col=None,
 
         self.columns = self._infer_columns()
         self.index_name = self._get_index_name()
+        self._first_chunk = True
 
     def _infer_columns(self):
         names = self.names
@@ -238,7 +251,8 @@ def _clear_buffer(self):
 
     def __iter__(self):
         try:
-            yield self.get_chunk(self.chunksize)
+            while True:
+                yield self.get_chunk(self.chunksize)
         except StopIteration:
             pass
 
@@ -280,7 +294,16 @@ def _get_index_name(self):
         return index_name
 
     def get_chunk(self, rows=None):
-        content = self._get_lines(rows)
+        try:
+            content = self._get_lines(rows)
+        except StopIteration:
+            if self._first_chunk:
+                content = []
+            else:
+                raise
+
+        # done with first read, next time raise StopIteration
+        self._first_chunk = False
 
         if len(content) == 0: # pragma: no cover
             if self.index_col is not None:
@@ -357,7 +380,9 @@ def _get_lines(self, rows=None):
                     while True:
                         lines.append(source.next())
             except StopIteration:
-                pass
+                if len(lines) == 0:
+                    raise
+            self.pos += len(lines)
 
         self.buf = []