API: Relax is-file-like conditions

gfyoung · gfyoung · commit d2efe18a95d0 · 2017-04-27T03:11:34.000-04:00
Previously, we were requiring that all file-like objects had "read," "write," "seek," and "tell" methods, but that was too strict (e.g. read-only buffers). This commit relaxes those requirements to having EITHER "read" or "write" as attributes. Closes pandas-devgh-16135.
diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -142,12 +142,8 @@ def is_file_like(obj):
     Check if the object is a file-like object.
 
     For objects to be considered file-like, they must
-    be an iterator AND have the following four methods:
-
-    1) read
-    2) write
-    3) seek
-    4) tell
+    be an iterator AND have either a `read` and/or `write`
+    method as an attribute.
 
     Note: file-like objects must be iterable, but
     iterable objects need not be file-like.
@@ -172,11 +168,8 @@ def is_file_like(obj):
     False
     """
 
-    file_attrs = ('read', 'write', 'seek', 'tell')
-
-    for attr in file_attrs:
-        if not hasattr(obj, attr):
-            return False
+    if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
+        return False
 
     if not is_iterator(obj):
         return False
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -100,11 +100,41 @@ def test_is_dict_like():
 
 
 def test_is_file_like():
+    class MockFile(object):
+        pass
+
     is_file = inference.is_file_like
 
     data = StringIO("data")
     assert is_file(data)
 
+    # No read / write attributes
+    # No iterator attributes
+    m = MockFile()
+    assert not is_file(m)
+
+    MockFile.write = lambda self: 0
+
+    # Write attribute but not an iterator
+    m = MockFile()
+    assert not is_file(m)
+
+    MockFile.__iter__ = lambda self: self
+    MockFile.__next__ = lambda self: 0
+    MockFile.next = MockFile.__next__
+
+    # Valid write-only file
+    m = MockFile()
+    assert is_file(m)
+
+    del MockFile.write
+    MockFile.read = lambda self: 0
+
+    # Valid read-only file
+    m = MockFile()
+    assert is_file(m)
+
+    # Iterator but no read / write attributes
     data = [1, 2, 3]
     assert not is_file(data)
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
@@ -1685,6 +1685,26 @@ class InvalidBuffer(object):
         with tm.assert_raises_regex(ValueError, msg):
             self.read_csv(InvalidBuffer())
 
+        # gh-16135: we want to ensure that "tell" and "seek"
+        # aren't actually being used when we call `read_csv`
+        #
+        # Thus, while the object may look "invalid" (these
+        # methods are attributes of the `StringIO` class),
+        # it is still a valid file-object for our purposes.
+        class NoSeekTellBuffer(StringIO):
+            def tell(self):
+                raise AttributeError("No tell method")
+
+            def seek(self, pos, whence=0):
+                raise AttributeError("No seek method")
+
+        data = "a\n1"
+
+        expected = pd.DataFrame({"a": [1]})
+        result = self.read_csv(NoSeekTellBuffer(data))
+
+        tm.assert_frame_equal(result, expected)
+
         if PY3:
             from unittest import mock
 
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -176,3 +176,22 @@ def test_s3_fails(self):
         # It's irrelevant here that this isn't actually a table.
         with pytest.raises(IOError):
             read_csv('s3://cant_get_it/')
+
+    @tm.network
+    def boto3_client_s3(self):
+        # see gh-16135
+
+        # boto3 is a dependency of s3fs
+        import boto3
+        client = boto3.client("s3")
+
+        key = "/tips.csv"
+        bucket = "pandas-test"
+        s3_object = client.get_object(Bucket=bucket, Key=key)
+
+        result = read_csv(s3_object["Body"])
+        assert isinstance(result, DataFrame)
+        assert not result.empty
+
+        expected = read_csv(tm.get_data_path('tips.csv'))
+        tm.assert_frame_equal(result, expected)