Skip to content

Commit 2e829f2

Browse files
committed
COMPAT: Consider Python 2.x tarfiles file-like
Tarfile.ExFileObject has no "next" method in Python 2.x, making it an invalid file-like object in read_csv. However, they can be read in just fine, meaning our check is too strict for file-like. This commit relaxes the check to just look for "__iter__". Closes pandas-devgh-16530.
1 parent e60dc4c commit 2e829f2

File tree

4 files changed

+49
-4
lines changed

4 files changed

+49
-4
lines changed

doc/source/whatsnew/v0.20.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ I/O
6464
- Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`)
6565
- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
6666
- Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
67+
- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
6768

6869

6970
Plotting

pandas/core/dtypes/inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def is_file_like(obj):
171171
if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
172172
return False
173173

174-
if not is_iterator(obj):
174+
if not hasattr(obj, "__iter__"):
175175
return False
176176

177177
return True

pandas/tests/dtypes/test_inference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,9 @@ class MockFile(object):
120120
m = MockFile()
121121
assert not is_file(m)
122122

123+
# gh-16530: Valid iterator just means we have the
124+
# __iter__ attribute for our purposes.
123125
MockFile.__iter__ = lambda self: self
124-
MockFile.__next__ = lambda self: 0
125-
MockFile.next = MockFile.__next__
126126

127127
# Valid write-only file
128128
m = MockFile()

pandas/tests/io/parser/c_parser_only.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"""
99

1010
import sys
11+
import tarfile
1112

1213
import pytest
1314
import numpy as np
@@ -16,7 +17,7 @@
1617
import pandas.util.testing as tm
1718
from pandas import DataFrame
1819
from pandas import compat
19-
from pandas.compat import StringIO, range, lrange
20+
from pandas.compat import StringIO, range, lrange, u
2021

2122

2223
class CParserTests(object):
@@ -446,3 +447,46 @@ def test_comment_whitespace_delimited(self):
446447
[7, np.nan],
447448
[8, np.nan]])
448449
tm.assert_frame_equal(df, expected)
450+
451+
def test_file_like_no_next(self):
452+
# gh-16530: the file-like need not have a "next" or "__next__"
453+
# attribute despite having an "__iter__" attribute.
454+
#
455+
# NOTE: This is only true for the C engine, not Python engine.
456+
class NoNextBuffer(StringIO):
457+
def __next__(self):
458+
raise AttributeError("No next method")
459+
460+
next = __next__
461+
462+
data = "a\n1"
463+
464+
expected = pd.DataFrame({"a": [1]})
465+
result = self.read_csv(NoNextBuffer(data))
466+
467+
tm.assert_frame_equal(result, expected)
468+
469+
def test_read_tarfile(self):
470+
# see gh-16530
471+
#
472+
# Unfortunately, Python's CSV library can't handle
473+
# tarfile objects (expects string, not bytes when
474+
# iterating through a file-like).
475+
expected = pd.DataFrame({"a": [1]})
476+
477+
tar_path = u("__%s__.tar" % tm.rands(10))
478+
file_path = u("__%s__.csv" % tm.rands(10))
479+
480+
with tm.ensure_clean(tar_path) as tar_path:
481+
with tm.ensure_clean(file_path) as file_path:
482+
expected.to_csv(file_path, index=False)
483+
484+
tar = tarfile.open(tar_path, "w")
485+
tar.add(file_path)
486+
tar.close()
487+
488+
tar = tarfile.open(tar_path, "r")
489+
data_file = tar.extractfile(file_path)
490+
out = self.read_csv(data_file)
491+
492+
tm.assert_frame_equal(out, expected)

0 commit comments

Comments
 (0)