Skip to content

Commit f24b923

Browse files
committed
BUG: respect passed chunksize in read_csv when using get_chunk function. close #3406
1 parent 5adcceb commit f24b923

File tree

3 files changed

+24
-5
lines changed

3 files changed

+24
-5
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ pandas 0.11.0
311311
to non-fast apply) (GH3380_)
312312
- Eliminated unicode errors on FreeBSD when using MPL GTK backend (GH3360_)
313313
- Period.strftime should return unicode strings always (GH3363_)
314+
- Respect passed read_* chunksize in get_chunk function (GH3406_)
314315

315316
.. _GH3294: https://github.com/pydata/pandas/issues/3294
316317
.. _GH622: https://github.com/pydata/pandas/issues/622
@@ -425,6 +426,7 @@ pandas 0.11.0
425426
.. _GH3308: https://github.com/pydata/pandas/issues/3308
426427
.. _GH3311: https://github.com/pydata/pandas/issues/3311
427428
.. _GH3380: https://github.com/pydata/pandas/issues/3380
429+
.. _GH3406: https://github.com/pydata/pandas/issues/3406
428430

429431
pandas 0.10.1
430432
=============

pandas/io/parsers.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -649,9 +649,10 @@ def read(self, nrows=None):
649649
def _create_index(self, col_dict, columns):
650650
pass
651651

652-
# backwards compatibility
653-
get_chunk = read
654-
652+
def get_chunk(self, size=None):
653+
if size is None:
654+
size = self.chunksize
655+
return self.read(nrows=size)
655656

656657
def _is_index_col(col):
657658
return col is not None and col is not False
@@ -1285,7 +1286,10 @@ def read(self, rows=None):
12851286
return index, columns, data
12861287

12871288
# legacy
1288-
get_chunk = read
1289+
def get_chunk(self, size=None):
1290+
if size is None:
1291+
size = self.chunksize
1292+
return self.read(nrows=size)
12891293

12901294
def _convert_data(self, data):
12911295
# apply converters

pandas/io/tests/test_parsers.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,9 @@ def test_malformed(self):
456456
2,3,4
457457
"""
458458
try:
459-
it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2])
459+
it = self.read_table(StringIO(data), sep=',', header=1,
460+
comment='#', iterator=True, chunksize=1,
461+
skiprows=[2])
460462
df = it.read(1)
461463
it.read(2)
462464
self.assert_(False)
@@ -876,6 +878,17 @@ def test_read_chunksize_named(self):
876878
tm.assert_frame_equal(chunks[1], df[2:4])
877879
tm.assert_frame_equal(chunks[2], df[4:])
878880

881+
def test_get_chunk_passed_chunksize(self):
882+
data = """A,B,C
883+
1,2,3
884+
4,5,6
885+
7,8,9
886+
1,2,3"""
887+
result = self.read_csv(StringIO(data), chunksize=2)
888+
889+
piece = result.get_chunk()
890+
self.assertEqual(len(piece), 2)
891+
879892
def test_read_text_list(self):
880893
data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
881894
as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',

0 commit comments

Comments
 (0)