Skip to content

Commit d2efe18

Browse files
committed
API: Relax is-file-like conditions
Previously, we were requiring that all file-like objects had "read," "write," "seek," and "tell" methods, but that was too strict (e.g. read-only buffers). This commit relaxes those requirements to having EITHER "read" or "write" as attributes. Closes pandas-devgh-16135.
1 parent 2d9909c commit d2efe18

File tree

4 files changed

+73
-11
lines changed

4 files changed

+73
-11
lines changed

pandas/core/dtypes/inference.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,8 @@ def is_file_like(obj):
142142
Check if the object is a file-like object.
143143
144144
For objects to be considered file-like, they must
145-
be an iterator AND have the following four methods:
146-
147-
1) read
148-
2) write
149-
3) seek
150-
4) tell
145+
be an iterator AND have either a `read` and/or `write`
146+
method as an attribute.
151147
152148
Note: file-like objects must be iterable, but
153149
iterable objects need not be file-like.
@@ -172,11 +168,8 @@ def is_file_like(obj):
172168
False
173169
"""
174170

175-
file_attrs = ('read', 'write', 'seek', 'tell')
176-
177-
for attr in file_attrs:
178-
if not hasattr(obj, attr):
179-
return False
171+
if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
172+
return False
180173

181174
if not is_iterator(obj):
182175
return False

pandas/tests/dtypes/test_inference.py

+30
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,41 @@ def test_is_dict_like():
100100

101101

102102
def test_is_file_like():
103+
class MockFile(object):
104+
pass
105+
103106
is_file = inference.is_file_like
104107

105108
data = StringIO("data")
106109
assert is_file(data)
107110

111+
# No read / write attributes
112+
# No iterator attributes
113+
m = MockFile()
114+
assert not is_file(m)
115+
116+
MockFile.write = lambda self: 0
117+
118+
# Write attribute but not an iterator
119+
m = MockFile()
120+
assert not is_file(m)
121+
122+
MockFile.__iter__ = lambda self: self
123+
MockFile.__next__ = lambda self: 0
124+
MockFile.next = MockFile.__next__
125+
126+
# Valid write-only file
127+
m = MockFile()
128+
assert is_file(m)
129+
130+
del MockFile.write
131+
MockFile.read = lambda self: 0
132+
133+
# Valid read-only file
134+
m = MockFile()
135+
assert is_file(m)
136+
137+
# Iterator but no read / write attributes
108138
data = [1, 2, 3]
109139
assert not is_file(data)
110140

pandas/tests/io/parser/common.py

+20
Original file line numberDiff line numberDiff line change
@@ -1685,6 +1685,26 @@ class InvalidBuffer(object):
16851685
with tm.assert_raises_regex(ValueError, msg):
16861686
self.read_csv(InvalidBuffer())
16871687

1688+
# gh-16135: we want to ensure that "tell" and "seek"
1689+
# aren't actually being used when we call `read_csv`
1690+
#
1691+
# Thus, while the object may look "invalid" (these
1692+
# methods are attributes of the `StringIO` class),
1693+
# it is still a valid file-object for our purposes.
1694+
class NoSeekTellBuffer(StringIO):
1695+
def tell(self):
1696+
raise AttributeError("No tell method")
1697+
1698+
def seek(self, pos, whence=0):
1699+
raise AttributeError("No seek method")
1700+
1701+
data = "a\n1"
1702+
1703+
expected = pd.DataFrame({"a": [1]})
1704+
result = self.read_csv(NoSeekTellBuffer(data))
1705+
1706+
tm.assert_frame_equal(result, expected)
1707+
16881708
if PY3:
16891709
from unittest import mock
16901710

pandas/tests/io/parser/test_network.py

+19
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,22 @@ def test_s3_fails(self):
176176
# It's irrelevant here that this isn't actually a table.
177177
with pytest.raises(IOError):
178178
read_csv('s3://cant_get_it/')
179+
180+
@tm.network
181+
def boto3_client_s3(self):
182+
# see gh-16135
183+
184+
# boto3 is a dependency of s3fs
185+
import boto3
186+
client = boto3.client("s3")
187+
188+
key = "/tips.csv"
189+
bucket = "pandas-test"
190+
s3_object = client.get_object(Bucket=bucket, Key=key)
191+
192+
result = read_csv(s3_object["Body"])
193+
assert isinstance(result, DataFrame)
194+
assert not result.empty
195+
196+
expected = read_csv(tm.get_data_path('tips.csv'))
197+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)