From 20bd678df790670d9cf9a0c0a8fb63845cc6ca30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 11 Aug 2020 21:10:42 -0400
Subject: [PATCH 1/3] to_csv: infer compression before potentially converting
 to file object;

get_handle: fsspec file objects need to be wrapped

get_filepath_or_buffer: path-like objects that are internally converted to file-like objects are opened in binary mode; named tuple

_BytesZipFile: work with filename-less objects
---
 doc/source/whatsnew/v1.2.0.rst      |  2 +
 pandas/_typing.py                   | 18 ++++++
 pandas/core/frame.py                |  3 +-
 pandas/core/generic.py              |  2 +-
 pandas/io/common.py                 | 83 ++++++++++++++++++++++----
 pandas/io/excel/_base.py            |  4 +-
 pandas/io/feather_format.py         |  4 +-
 pandas/io/formats/csvs.py           | 13 +++-
 pandas/io/json/_json.py             |  6 +-
 pandas/io/orc.py                    |  2 +-
 pandas/io/parquet.py                |  6 +-
 pandas/io/parsers.py                |  4 +-
 pandas/io/pickle.py                 |  8 +--
 pandas/io/sas/sas7bdat.py           |  2 +-
 pandas/io/sas/sas_xport.py          |  1 +
 pandas/io/sas/sasreader.py          |  2 +-
 pandas/io/stata.py                  |  4 +-
 pandas/tests/io/test_common.py      | 25 +++++++-
 pandas/tests/io/test_compression.py | 10 ++++
 pandas/tests/io/test_gcs.py         | 92 +++++++++++++++++++++--------
 20 files changed, 227 insertions(+), 64 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 55570341cf4e8..dae2f98bc0b76 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -240,6 +240,8 @@ I/O
 - In :meth:`read_csv` `float_precision='round_trip'` now handles `decimal` and `thousands` parameters (:issue:`35365`)
 - :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`)
 - :meth:`to_csv` passes compression arguments for `'gzip'` always to `gzip.GzipFile` (:issue:`28103`)
+- :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue: `35058`)
+- :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 1b972030ef5a5..1dc4cf33e5656 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -11,6 +11,7 @@
     Hashable,
     List,
     Mapping,
+    NamedTuple,
     Optional,
     Type,
     TypeVar,
@@ -114,3 +115,20 @@
 # compression keywords and compression
 CompressionDict = Mapping[str, Optional[Union[str, int, bool]]]
 CompressionOptions = Optional[Union[str, CompressionDict]]
+
+
+class IOargs(NamedTuple):
+    """
+    Return value of io/common.py:get_filepath_or_buffer.
+
+    Note (copy&past from io/parsers):
+    filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
+    though mypy handling of conditional imports is difficult.
+    See https://github.com/python/mypy/issues/1297
+    """
+
+    filepath_or_buffer: FilePathOrBuffer
+    encoding: Optional[str]
+    compression: CompressionOptions = None
+    should_close: bool = False
+    mode: Optional[str] = None
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 312d449e36022..c2e36408d1908 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2281,10 +2281,9 @@ def to_markdown(
         result = tabulate.tabulate(self, **kwargs)
         if buf is None:
             return result
-        buf, _, _, should_close = get_filepath_or_buffer(
+        buf, _, _, should_close, _ = get_filepath_or_buffer(  # type: ignore
             buf, mode=mode, storage_options=storage_options
         )
-        assert buf is not None  # Help mypy.
         assert not isinstance(buf, str)
         buf.writelines(result)
         if should_close:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3bad2d6dd18b9..5b3c13cfe332e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3249,7 +3249,7 @@ def to_csv(
         formatter.save()
 
         if path_or_buf is None:
-            return formatter.path_or_buf.getvalue()
+            return formatter.path_or_buf.getvalue()  # type: ignore
 
         return None
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index d1305c9cabe0e..85033142238bf 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -27,12 +27,14 @@
     uses_params,
     uses_relative,
 )
+import warnings
 import zipfile
 
 from pandas._typing import (
     CompressionDict,
     CompressionOptions,
     FilePathOrBuffer,
+    IOargs,
     StorageOptions,
 )
 from pandas.compat import _get_lzma_file, _import_lzma
@@ -168,7 +170,7 @@ def get_filepath_or_buffer(
     compression: CompressionOptions = None,
     mode: Optional[str] = None,
     storage_options: StorageOptions = None,
-):
+) -> IOargs:
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
     Otherwise passthrough.
@@ -191,14 +193,37 @@ def get_filepath_or_buffer(
 
         .. versionadded:: 1.2.0
 
-    Returns
-    -------
-    Tuple[FilePathOrBuffer, str, CompressionOptions, bool]
-        Tuple containing the filepath or buffer, the encoding, the compression
-        and should_close.
+    ..versionchange:: 1.2.0
+
+      A named tuple is returned. In addition to previously returned values it also
+      returns `mode`. If a path-like object is converted to a file-like object, the
+      returned mode is binary, otherwise it is the provided `mode`.
     """
     filepath_or_buffer = stringify_path(filepath_or_buffer)
 
+    # bz2 and xz do not write the byte order mark for utf-16 and utf-32
+    # print a warning when writing such files
+    compression_method = infer_compression(
+        filepath_or_buffer, get_compression_method(compression)[0]
+    )
+    if (
+        mode
+        and "w" in mode
+        and compression_method in ["bz2", "xz"]
+        and encoding in ["utf-16", "utf-32"]
+    ):
+        warnings.warn(
+            f"{compression} will not write the byte order mark for {encoding}",
+            UnicodeWarning,
+        )
+
+    # Use binary mode when converting path-like objects to file-like objects (fsspec)
+    # except when text mode is explicitly requested. The original mode is returned if
+    # fsspec is not used.
+    fsspec_mode = mode or "rb"
+    if "t" not in fsspec_mode and "b" not in fsspec_mode:
+        fsspec_mode += "b"
+
     if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
         # TODO: fsspec can also handle HTTP via requests, but leaving this unchanged
         if storage_options:
@@ -212,7 +237,13 @@ def get_filepath_or_buffer(
             compression = "gzip"
         reader = BytesIO(req.read())
         req.close()
-        return reader, encoding, compression, True
+        return IOargs(
+            filepath_or_buffer=reader,
+            encoding=encoding,
+            compression=compression,
+            should_close=True,
+            mode=fsspec_mode,
+        )
 
     if is_fsspec_url(filepath_or_buffer):
         assert isinstance(
@@ -244,7 +275,7 @@ def get_filepath_or_buffer(
 
         try:
             file_obj = fsspec.open(
-                filepath_or_buffer, mode=mode or "rb", **(storage_options or {})
+                filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
             ).open()
         # GH 34626 Reads from Public Buckets without Credentials needs anon=True
         except tuple(err_types_to_retry_with_anon):
@@ -255,23 +286,41 @@ def get_filepath_or_buffer(
                 storage_options = dict(storage_options)
                 storage_options["anon"] = True
             file_obj = fsspec.open(
-                filepath_or_buffer, mode=mode or "rb", **(storage_options or {})
+                filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
             ).open()
 
-        return file_obj, encoding, compression, True
+        return IOargs(
+            filepath_or_buffer=file_obj,
+            encoding=encoding,
+            compression=compression,
+            should_close=True,
+            mode=fsspec_mode,
+        )
     elif storage_options:
         raise ValueError(
             "storage_options passed with file object or non-fsspec file path"
         )
 
     if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
-        return _expand_user(filepath_or_buffer), None, compression, False
+        return IOargs(
+            filepath_or_buffer=_expand_user(filepath_or_buffer),
+            encoding=None,
+            compression=compression,
+            should_close=False,
+            mode=mode,
+        )
 
     if not is_file_like(filepath_or_buffer):
         msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
         raise ValueError(msg)
 
-    return filepath_or_buffer, None, compression, False
+    return IOargs(
+        filepath_or_buffer=filepath_or_buffer,
+        encoding=None,
+        compression=compression,
+        should_close=False,
+        mode=mode,
+    )
 
 
 def file_path_to_url(path: str) -> str:
@@ -452,6 +501,13 @@ def get_handle(
         need_text_wrapping = (BufferedIOBase, RawIOBase, S3File)
     except ImportError:
         need_text_wrapping = (BufferedIOBase, RawIOBase)
+    # fsspec is an optional dependency. If it is available, add its file-object
+    # class to the list of classes that need text wrapping.
+    fsspec = import_optional_dependency("fsspec", raise_on_missing=False)
+    if fsspec is not None:
+        need_text_wrapping = tuple(
+            list(need_text_wrapping) + [fsspec.spec.AbstractFileSystem]
+        )
 
     handles: List[Union[IO, _MMapWrapper]] = list()
     f = path_or_buf
@@ -589,6 +645,9 @@ def write(self, data):
         archive_name = self.filename
         if self.archive_name is not None:
             archive_name = self.archive_name
+        if archive_name is None:
+            # ZipFile needs a non-empty string
+            archive_name = "zip"
         super().writestr(archive_name, data)
 
     @property
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index ead36c95556b1..79f2ee9a060a8 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -352,9 +352,9 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
         if is_url(filepath_or_buffer):
             filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
-            filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
+            filepath_or_buffer = get_filepath_or_buffer(
                 filepath_or_buffer, storage_options=storage_options
-            )
+            )[0]
 
         if isinstance(filepath_or_buffer, self._workbook_class):
             self.book = filepath_or_buffer
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index fb606b5ec8aef..937eda21aee12 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -34,7 +34,7 @@ def to_feather(df: DataFrame, path, storage_options: StorageOptions = None, **kw
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path, _, _, should_close = get_filepath_or_buffer(
+    path, _, _, should_close, _ = get_filepath_or_buffer(
         path, mode="wb", storage_options=storage_options
     )
 
@@ -122,7 +122,7 @@ def read_feather(
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path, _, _, should_close = get_filepath_or_buffer(
+    path, _, _, should_close, _ = get_filepath_or_buffer(
         path, storage_options=storage_options
     )
 
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index c462a96da7133..86b4eb9d2fc93 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -62,14 +62,22 @@ def __init__(
 
         # Extract compression mode as given, if dict
         compression, self.compression_args = get_compression_method(compression)
+        self.compression = infer_compression(path_or_buf, compression)
 
-        self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer(
+        (
+            self.path_or_buf,
+            _,
+            _,
+            self.should_close,
+            mode,  # type: ignore
+        ) = get_filepath_or_buffer(
             path_or_buf,
             encoding=encoding,
-            compression=compression,
+            compression=self.compression,
             mode=mode,
             storage_options=storage_options,
         )
+        assert self.path_or_buf is not None
         self.sep = sep
         self.na_rep = na_rep
         self.float_format = float_format
@@ -83,7 +91,6 @@ def __init__(
             encoding = "utf-8"
         self.encoding = encoding
         self.errors = errors
-        self.compression = infer_compression(self.path_or_buf, compression)
 
         if quoting is None:
             quoting = csvlib.QUOTE_MINIMAL
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index fe5e172655ae1..5ecdd312103a9 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -58,7 +58,7 @@ def to_json(
         )
 
     if path_or_buf is not None:
-        path_or_buf, _, _, should_close = get_filepath_or_buffer(
+        path_or_buf, _, _, should_close, _ = get_filepath_or_buffer(
             path_or_buf,
             compression=compression,
             mode="wt",
@@ -615,7 +615,7 @@ def read_json(
     compression_method, compression = get_compression_method(compression)
     compression_method = infer_compression(path_or_buf, compression_method)
     compression = dict(compression, method=compression_method)
-    filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
+    filepath_or_buffer, _, compression, should_close, _ = get_filepath_or_buffer(
         path_or_buf,
         encoding=encoding,
         compression=compression,
@@ -645,7 +645,7 @@ def read_json(
 
     result = json_reader.read()
     if should_close:
-        filepath_or_buffer.close()
+        filepath_or_buffer.close()  # type: ignore
 
     return result
 
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index b556732e4d116..0d87cbadd5225 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -50,7 +50,7 @@ def read_orc(
 
     import pyarrow.orc
 
-    path, _, _, _ = get_filepath_or_buffer(path)
+    path = get_filepath_or_buffer(path)[0]
     orc_file = pyarrow.orc.ORCFile(path)
     result = orc_file.read(columns=columns, **kwargs).to_pandas()
     return result
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 7f0eef039a1e8..4036256f3d509 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -146,7 +146,7 @@ def read(
             path = _expand_user(path)
 
         if not fs:
-            path, _, _, should_close = get_filepath_or_buffer(path)
+            path, _, _, should_close, _ = get_filepath_or_buffer(path)
 
         kwargs["use_pandas_metadata"] = True
         result = self.api.parquet.read_table(
@@ -205,7 +205,7 @@ def write(
                 raise ValueError(
                     "storage_options passed with file object or non-fsspec file path"
                 )
-            path, _, _, _ = get_filepath_or_buffer(path)
+            path = get_filepath_or_buffer(path)[0]
 
         with catch_warnings(record=True):
             self.api.write(
@@ -228,7 +228,7 @@ def read(
             ).open()
             parquet_file = self.api.ParquetFile(path, open_with=open_with)
         else:
-            path, _, _, _ = get_filepath_or_buffer(path)
+            path = get_filepath_or_buffer(path)[0]
             parquet_file = self.api.ParquetFile(path)
 
         return parquet_file.to_pandas(columns=columns, **kwargs)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 983aa56324083..c898c0ef78b15 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -432,7 +432,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
     # though mypy handling of conditional imports is difficult.
     # See https://github.com/python/mypy/issues/1297
-    fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
+    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
         filepath_or_buffer, encoding, compression, storage_options=storage_options
     )
     kwds["compression"] = compression
@@ -462,7 +462,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
 
     if should_close:
         try:
-            fp_or_buf.close()
+            fp_or_buf.close()  # type: ignore
         except ValueError:
             pass
 
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index fc1d2e385cf72..638b034077600 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -86,7 +86,7 @@ def to_pickle(
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
+    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
         filepath_or_buffer,
         compression=compression,
         mode="wb",
@@ -107,7 +107,7 @@ def to_pickle(
             _f.close()
         if should_close:
             try:
-                fp_or_buf.close()
+                fp_or_buf.close()  # type: ignore
             except ValueError:
                 pass
 
@@ -189,7 +189,7 @@ def read_pickle(
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
+    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
         filepath_or_buffer, compression=compression, storage_options=storage_options
     )
     if not isinstance(fp_or_buf, str) and compression == "infer":
@@ -224,6 +224,6 @@ def read_pickle(
             _f.close()
         if should_close:
             try:
-                fp_or_buf.close()
+                fp_or_buf.close()  # type: ignore
             except ValueError:
                 pass
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 3d9be7c15726b..4292016edf689 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -137,7 +137,7 @@ def __init__(
         self._current_row_on_page_index = 0
         self._current_row_in_file_index = 0
 
-        self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf)
+        self._path_or_buf = get_filepath_or_buffer(path_or_buf)[0]
         if isinstance(self._path_or_buf, str):
             self._path_or_buf = open(self._path_or_buf, "rb")
             self.handle = self._path_or_buf
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
index 6cf248b748107..5a8b20ed2742a 100644
--- a/pandas/io/sas/sas_xport.py
+++ b/pandas/io/sas/sas_xport.py
@@ -258,6 +258,7 @@ def __init__(
                 encoding,
                 compression,
                 should_close,
+                _,
             ) = get_filepath_or_buffer(filepath_or_buffer, encoding=encoding)
 
         if isinstance(filepath_or_buffer, (str, bytes)):
diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py
index fffdebda8c87a..8292d2d0fe8b0 100644
--- a/pandas/io/sas/sasreader.py
+++ b/pandas/io/sas/sasreader.py
@@ -109,7 +109,7 @@ def read_sas(
         else:
             raise ValueError("unable to infer format of SAS file")
 
-    filepath_or_buffer, _, _, should_close = get_filepath_or_buffer(
+    filepath_or_buffer, _, _, should_close, _ = get_filepath_or_buffer(
         filepath_or_buffer, encoding
     )
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index ec3819f1673a8..24cb92c8a26bb 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1069,7 +1069,7 @@ def __init__(
         self._native_byteorder = _set_endianness(sys.byteorder)
         path_or_buf = stringify_path(path_or_buf)
         if isinstance(path_or_buf, str):
-            path_or_buf, encoding, _, should_close = get_filepath_or_buffer(
+            path_or_buf, encoding, _, should_close, _ = get_filepath_or_buffer(
                 path_or_buf, storage_options=storage_options
             )
 
@@ -1979,7 +1979,7 @@ def _open_file_binary_write(
         compression_typ, compression_args = get_compression_method(compression)
         compression_typ = infer_compression(fname, compression_typ)
         compression = dict(compression_args, method=compression_typ)
-        path_or_buf, _, compression, _ = get_filepath_or_buffer(
+        path_or_buf, _, compression, _, _ = get_filepath_or_buffer(
             fname, mode="wb", compression=compression, storage_options=storage_options,
         )
         f, _ = get_handle(path_or_buf, "wb", compression=compression, is_text=False)
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 5ce2233bc0cd0..e4d5db444e186 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -107,7 +107,9 @@ def test_infer_compression_from_path(self, extension, expected, path_type):
 
     def test_get_filepath_or_buffer_with_path(self):
         filename = "~/sometest"
-        filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(filename)
+        filepath_or_buffer, _, _, should_close, _ = icom.get_filepath_or_buffer(
+            filename
+        )
         assert filepath_or_buffer != filename
         assert os.path.isabs(filepath_or_buffer)
         assert os.path.expanduser(filename) == filepath_or_buffer
@@ -115,7 +117,7 @@ def test_get_filepath_or_buffer_with_path(self):
 
     def test_get_filepath_or_buffer_with_buffer(self):
         input_buffer = StringIO()
-        filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(
+        filepath_or_buffer, _, _, should_close, _ = icom.get_filepath_or_buffer(
             input_buffer
         )
         assert filepath_or_buffer == input_buffer
@@ -389,6 +391,25 @@ def test_binary_mode(self):
             df.to_csv(path, mode="w+b")
             tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
 
+    @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
+    @pytest.mark.parametrize("compression_", ["bz2", "xz"])
+    def test_warning_missing_utf_bom(self, encoding, compression_):
+        """
+        bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
+
+        https://stackoverflow.com/questions/55171439
+
+        GH 35681
+        """
+        df = tm.makeDataFrame()
+        with tm.ensure_clean() as path:
+            with tm.assert_produces_warning(UnicodeWarning):
+                df.to_csv(path, compression=compression_, encoding=encoding)
+
+            # reading should fail (otherwise we wouldn't need the warning)
+            with pytest.raises(Exception):
+                pd.read_csv(path, compression=compression_, encoding=encoding)
+
 
 def test_is_fsspec_url():
     assert icom.is_fsspec_url("gcs://pandas/somethingelse.com")
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index bc14b485f75e5..31e9ad4cf4416 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -124,6 +124,8 @@ def test_compression_binary(compression_only):
     GH22555
     """
     df = tm.makeDataFrame()
+
+    # with a file
     with tm.ensure_clean() as path:
         with open(path, mode="wb") as file:
             df.to_csv(file, mode="wb", compression=compression_only)
@@ -132,6 +134,14 @@ def test_compression_binary(compression_only):
             df, pd.read_csv(path, index_col=0, compression=compression_only)
         )
 
+    # with BytesIO
+    file = io.BytesIO()
+    df.to_csv(file, mode="wb", compression=compression_only)
+    file.seek(0)  # file shouldn't be closed
+    tm.assert_frame_equal(
+        df, pd.read_csv(file, index_col=0, compression=compression_only)
+    )
+
 
 def test_gzip_reproducibility_file_name():
     """
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index eacf4fa08545d..18b5743a3375a 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -9,12 +9,32 @@
 from pandas.util import _test_decorators as td
 
 
-@td.skip_if_no("gcsfs")
-def test_read_csv_gcs(monkeypatch):
+@pytest.fixture
+def gcs_buffer(monkeypatch):
+    """Emulate GCS using a binary buffer."""
     from fsspec import AbstractFileSystem, registry
 
     registry.target.clear()  # noqa  # remove state
 
+    gcs_buffer = BytesIO()
+    gcs_buffer.close = lambda: True
+
+    class MockGCSFileSystem(AbstractFileSystem):
+        def open(*args, **kwargs):
+            gcs_buffer.seek(0)
+            return gcs_buffer
+
+    monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
+
+    return gcs_buffer
+
+
+@td.skip_if_no("gcsfs")
+def test_read_csv_gcs(gcs_buffer):
+    from fsspec import registry
+
+    registry.target.clear()  # noqa  # remove state
+
     df1 = DataFrame(
         {
             "int": [1, 3],
@@ -24,21 +44,19 @@ def test_read_csv_gcs(monkeypatch):
         }
     )
 
-    class MockGCSFileSystem(AbstractFileSystem):
-        def open(*args, **kwargs):
-            return BytesIO(df1.to_csv(index=False).encode())
+    gcs_buffer.write(df1.to_csv(index=False).encode())
 
-    monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
     df2 = read_csv("gs://test/test.csv", parse_dates=["dt"])
 
     tm.assert_frame_equal(df1, df2)
 
 
 @td.skip_if_no("gcsfs")
-def test_to_csv_gcs(monkeypatch):
-    from fsspec import AbstractFileSystem, registry
+def test_to_csv_gcs(gcs_buffer):
+    from fsspec import registry
 
     registry.target.clear()  # noqa  # remove state
+
     df1 = DataFrame(
         {
             "int": [1, 3],
@@ -47,29 +65,57 @@ def test_to_csv_gcs(monkeypatch):
             "dt": date_range("2018-06-18", periods=2),
         }
     )
-    s = BytesIO()
-    s.close = lambda: True
-
-    class MockGCSFileSystem(AbstractFileSystem):
-        def open(*args, **kwargs):
-            s.seek(0)
-            return s
 
-    monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
     df1.to_csv("gs://test/test.csv", index=True)
 
-    def mock_get_filepath_or_buffer(*args, **kwargs):
-        return BytesIO(df1.to_csv(index=True).encode()), None, None, False
-
-    monkeypatch.setattr(
-        "pandas.io.common.get_filepath_or_buffer", mock_get_filepath_or_buffer
-    )
-
     df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0)
 
     tm.assert_frame_equal(df1, df2)
 
 
+@td.skip_if_no("gcsfs")
+@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
+def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding):
+    """
+    Compression and encoding should with GCS.
+
+    GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and
+    GH 32392 (read_csv, encoding)
+    """
+    from fsspec import registry
+
+    registry.target.clear()  # noqa  # remove state
+    df = tm.makeDataFrame()
+
+    # reference of compressed and encoded file
+    compression = {"method": compression_only}
+    if compression_only == "gzip":
+        compression["mtime"] = 1  # be reproducible
+    buffer = BytesIO()
+    df.to_csv(buffer, compression=compression, encoding=encoding, mode="wb")
+
+    # write compressed file with explicit compression
+    path_gcs = "gs://test/test.csv"
+    df.to_csv(path_gcs, compression=compression, encoding=encoding)
+    assert gcs_buffer.getvalue() == buffer.getvalue()
+    read_df = read_csv(
+        path_gcs, index_col=0, compression=compression_only, encoding=encoding
+    )
+    tm.assert_frame_equal(df, read_df)
+
+    # write compressed file with implicit compression
+    if compression_only == "gzip":
+        compression_only = "gz"
+    compression["method"] = "infer"
+    path_gcs += f".{compression_only}"
+    df.to_csv(
+        path_gcs, compression=compression, encoding=encoding,
+    )
+    assert gcs_buffer.getvalue() == buffer.getvalue()
+    read_df = read_csv(path_gcs, index_col=0, compression="infer", encoding=encoding)
+    tm.assert_frame_equal(df, read_df)
+
+
 @td.skip_if_no("fastparquet")
 @td.skip_if_no("gcsfs")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):

From 935fc4bd676bcb38d003352c9ccb418dad8f1ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 25 Aug 2020 15:56:53 -0400
Subject: [PATCH 2/3] bind input type of encding and mode with the returned
 type; removed ignore statements (mypy will compile about filepath_or_buffer)

---
 pandas/_typing.py         | 18 +++++++++++++-----
 pandas/core/frame.py      |  2 +-
 pandas/core/generic.py    |  2 +-
 pandas/io/common.py       |  9 ++++++---
 pandas/io/formats/csvs.py |  8 +-------
 pandas/io/json/_json.py   |  2 +-
 pandas/io/parsers.py      |  2 +-
 pandas/io/pickle.py       |  2 +-
 8 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 1dc4cf33e5656..859e959ceb756 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from datetime import datetime, timedelta, tzinfo
 from pathlib import Path
 from typing import (
@@ -8,6 +9,7 @@
     Callable,
     Collection,
     Dict,
+    Generic,
     Hashable,
     List,
     Mapping,
@@ -117,7 +119,13 @@
 CompressionOptions = Optional[Union[str, CompressionDict]]
 
 
-class IOargs(NamedTuple):
+# lets us bind types
+ModeVar = TypeVar("ModeVar", str, None)
+EncodingVar = TypeVar("EncodingVar", str, None)
+
+
+@dataclass
+class IOargs(Generic[ModeVar, EncodingVar]):
     """
     Return value of io/common.py:get_filepath_or_buffer.
 
@@ -128,7 +136,7 @@ class IOargs(NamedTuple):
     """
 
     filepath_or_buffer: FilePathOrBuffer
-    encoding: Optional[str]
-    compression: CompressionOptions = None
-    should_close: bool = False
-    mode: Optional[str] = None
+    encoding: EncodingVar
+    compression: CompressionOptions
+    should_close: bool
+    mode: Union[ModeVar, str]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c2e36408d1908..9ac6d8620874c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2281,7 +2281,7 @@ def to_markdown(
         result = tabulate.tabulate(self, **kwargs)
         if buf is None:
             return result
-        buf, _, _, should_close, _ = get_filepath_or_buffer(  # type: ignore
+        buf, _, _, should_close, _ = get_filepath_or_buffer(
             buf, mode=mode, storage_options=storage_options
         )
         assert not isinstance(buf, str)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5b3c13cfe332e..3bad2d6dd18b9 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3249,7 +3249,7 @@ def to_csv(
         formatter.save()
 
         if path_or_buf is None:
-            return formatter.path_or_buf.getvalue()  # type: ignore
+            return formatter.path_or_buf.getvalue()
 
         return None
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 85033142238bf..8905bb5bd1ec3 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -33,8 +33,11 @@
 from pandas._typing import (
     CompressionDict,
     CompressionOptions,
+    EncodingVar,
     FilePathOrBuffer,
+    FilePathOrBufferVar,
     IOargs,
+    ModeVar,
     StorageOptions,
 )
 from pandas.compat import _get_lzma_file, _import_lzma
@@ -166,11 +169,11 @@ def is_fsspec_url(url: FilePathOrBuffer) -> bool:
 
 def get_filepath_or_buffer(
     filepath_or_buffer: FilePathOrBuffer,
-    encoding: Optional[str] = None,
+    encoding: EncodingVar = None,  # type: ignore[assignment]
     compression: CompressionOptions = None,
-    mode: Optional[str] = None,
+    mode: ModeVar = None,  # type: ignore[assignment]
     storage_options: StorageOptions = None,
-) -> IOargs:
+) -> IOargs[ModeVar, EncodingVar]:
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
     Otherwise passthrough.
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 86b4eb9d2fc93..0081794def4d0 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -64,13 +64,7 @@ def __init__(
         compression, self.compression_args = get_compression_method(compression)
         self.compression = infer_compression(path_or_buf, compression)
 
-        (
-            self.path_or_buf,
-            _,
-            _,
-            self.should_close,
-            mode,  # type: ignore
-        ) = get_filepath_or_buffer(
+        (self.path_or_buf, _, _, self.should_close, mode,) = get_filepath_or_buffer(
             path_or_buf,
             encoding=encoding,
             compression=self.compression,
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 5ecdd312103a9..5c7f7dcb4819a 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -645,7 +645,7 @@ def read_json(
 
     result = json_reader.read()
     if should_close:
-        filepath_or_buffer.close()  # type: ignore
+        filepath_or_buffer.close()
 
     return result
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index c898c0ef78b15..bbdc01e22213d 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -462,7 +462,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
 
     if should_close:
         try:
-            fp_or_buf.close()  # type: ignore
+            fp_or_buf.close()
         except ValueError:
             pass
 
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 638b034077600..06e371fa1a011 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -107,7 +107,7 @@ def to_pickle(
             _f.close()
         if should_close:
             try:
-                fp_or_buf.close()  # type: ignore
+                fp_or_buf.close()
             except ValueError:
                 pass
 

From 475e8e8595370085d64a8c5e10a1f020f19f7ec7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 25 Aug 2020 17:07:34 -0400
Subject: [PATCH 3/3] use named tuple; remove some unused variables; closed
 some file handles; refine type for filepath_or_buffer

---
 pandas/_typing.py              | 13 ++++++-----
 pandas/core/frame.py           | 12 ++++------
 pandas/core/generic.py         |  2 ++
 pandas/io/common.py            | 42 ++++++++++++++++------------------
 pandas/io/excel/_base.py       |  2 +-
 pandas/io/feather_format.py    | 23 +++++++++++--------
 pandas/io/formats/csvs.py      |  8 ++++---
 pandas/io/json/_json.py        | 19 +++++++++------
 pandas/io/orc.py               |  4 ++--
 pandas/io/parquet.py           | 14 +++++++-----
 pandas/io/parsers.py           | 11 +++++----
 pandas/io/pickle.py            | 28 +++++++++++++++--------
 pandas/io/sas/sas7bdat.py      |  2 +-
 pandas/io/sas/sas_xport.py     | 10 +++-----
 pandas/io/sas/sasreader.py     | 16 ++++++++-----
 pandas/io/stata.py             | 15 ++++++++----
 pandas/tests/io/test_common.py | 26 ++++++++++-----------
 17 files changed, 135 insertions(+), 112 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 859e959ceb756..f8af92e07c674 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from datetime import datetime, timedelta, tzinfo
+from io import IOBase
 from pathlib import Path
 from typing import (
     IO,
@@ -13,7 +14,6 @@
     Hashable,
     List,
     Mapping,
-    NamedTuple,
     Optional,
     Type,
     TypeVar,
@@ -65,7 +65,8 @@
     "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
 ]
 DtypeObj = Union[np.dtype, "ExtensionDtype"]
-FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
+FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]
+FileOrBuffer = Union[str, IO[AnyStr], IOBase]
 
 # FrameOrSeriesUnion  means either a DataFrame or a Series. E.g.
 # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
@@ -119,9 +120,9 @@
 CompressionOptions = Optional[Union[str, CompressionDict]]
 
 
-# lets us bind types
-ModeVar = TypeVar("ModeVar", str, None)
-EncodingVar = TypeVar("EncodingVar", str, None)
+# let's bind types
+ModeVar = TypeVar("ModeVar", str, None, Optional[str])
+EncodingVar = TypeVar("EncodingVar", str, None, Optional[str])
 
 
 @dataclass
@@ -135,7 +136,7 @@ class IOargs(Generic[ModeVar, EncodingVar]):
     See https://github.com/python/mypy/issues/1297
     """
 
-    filepath_or_buffer: FilePathOrBuffer
+    filepath_or_buffer: FileOrBuffer
     encoding: EncodingVar
     compression: CompressionOptions
     should_close: bool
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9ac6d8620874c..eaa27d3f2a857 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2281,13 +2281,11 @@ def to_markdown(
         result = tabulate.tabulate(self, **kwargs)
         if buf is None:
             return result
-        buf, _, _, should_close, _ = get_filepath_or_buffer(
-            buf, mode=mode, storage_options=storage_options
-        )
-        assert not isinstance(buf, str)
-        buf.writelines(result)
-        if should_close:
-            buf.close()
+        ioargs = get_filepath_or_buffer(buf, mode=mode, storage_options=storage_options)
+        assert not isinstance(ioargs.filepath_or_buffer, str)
+        ioargs.filepath_or_buffer.writelines(result)
+        if ioargs.should_close:
+            ioargs.filepath_or_buffer.close()
         return None
 
     @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3bad2d6dd18b9..94eef26e57592 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2,6 +2,7 @@
 from datetime import timedelta
 import functools
 import gc
+from io import StringIO
 import json
 import operator
 import pickle
@@ -3249,6 +3250,7 @@ def to_csv(
         formatter.save()
 
         if path_or_buf is None:
+            assert isinstance(formatter.path_or_buf, StringIO)
             return formatter.path_or_buf.getvalue()
 
         return None
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 8905bb5bd1ec3..97dbc7f1031a2 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -34,8 +34,8 @@
     CompressionDict,
     CompressionOptions,
     EncodingVar,
+    FileOrBuffer,
     FilePathOrBuffer,
-    FilePathOrBufferVar,
     IOargs,
     ModeVar,
     StorageOptions,
@@ -74,9 +74,7 @@ def is_url(url) -> bool:
     return parse_url(url).scheme in _VALID_URLS
 
 
-def _expand_user(
-    filepath_or_buffer: FilePathOrBuffer[AnyStr],
-) -> FilePathOrBuffer[AnyStr]:
+def _expand_user(filepath_or_buffer: FileOrBuffer[AnyStr]) -> FileOrBuffer[AnyStr]:
     """
     Return the argument with an initial component of ~ or ~user
     replaced by that user's home directory.
@@ -106,7 +104,7 @@ def validate_header_arg(header) -> None:
 
 def stringify_path(
     filepath_or_buffer: FilePathOrBuffer[AnyStr],
-) -> FilePathOrBuffer[AnyStr]:
+) -> FileOrBuffer[AnyStr]:
     """
     Attempt to convert a path-like object to a string.
 
@@ -139,9 +137,9 @@ def stringify_path(
         # "__fspath__"  [union-attr]
         # error: Item "IO[bytes]" of "Union[str, Path, IO[bytes]]" has no
         # attribute "__fspath__"  [union-attr]
-        return filepath_or_buffer.__fspath__()  # type: ignore[union-attr]
+        filepath_or_buffer = filepath_or_buffer.__fspath__()  # type: ignore[union-attr]
     elif isinstance(filepath_or_buffer, pathlib.Path):
-        return str(filepath_or_buffer)
+        filepath_or_buffer = str(filepath_or_buffer)
     return _expand_user(filepath_or_buffer)
 
 
@@ -167,11 +165,11 @@ def is_fsspec_url(url: FilePathOrBuffer) -> bool:
     )
 
 
-def get_filepath_or_buffer(
+def get_filepath_or_buffer(  # type: ignore[assignment]
     filepath_or_buffer: FilePathOrBuffer,
-    encoding: EncodingVar = None,  # type: ignore[assignment]
+    encoding: EncodingVar = None,
     compression: CompressionOptions = None,
-    mode: ModeVar = None,  # type: ignore[assignment]
+    mode: ModeVar = None,
     storage_options: StorageOptions = None,
 ) -> IOargs[ModeVar, EncodingVar]:
     """
@@ -198,9 +196,7 @@ def get_filepath_or_buffer(
 
     ..versionchange:: 1.2.0
 
-      A named tuple is returned. In addition to previously returned values it also
-      returns `mode`. If a path-like object is converted to a file-like object, the
-      returned mode is binary, otherwise it is the provided `mode`.
+      Returns the dataclass IOargs.
     """
     filepath_or_buffer = stringify_path(filepath_or_buffer)
 
@@ -307,7 +303,7 @@ def get_filepath_or_buffer(
     if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
         return IOargs(
             filepath_or_buffer=_expand_user(filepath_or_buffer),
-            encoding=None,
+            encoding=encoding,
             compression=compression,
             should_close=False,
             mode=mode,
@@ -319,7 +315,7 @@ def get_filepath_or_buffer(
 
     return IOargs(
         filepath_or_buffer=filepath_or_buffer,
-        encoding=None,
+        encoding=encoding,
         compression=compression,
         should_close=False,
         mode=mode,
@@ -505,12 +501,14 @@ def get_handle(
     except ImportError:
         need_text_wrapping = (BufferedIOBase, RawIOBase)
     # fsspec is an optional dependency. If it is available, add its file-object
-    # class to the list of classes that need text wrapping.
-    fsspec = import_optional_dependency("fsspec", raise_on_missing=False)
-    if fsspec is not None:
-        need_text_wrapping = tuple(
-            list(need_text_wrapping) + [fsspec.spec.AbstractFileSystem]
-        )
+    # class to the list of classes that need text wrapping. If fsspec is too old and is
+    # needed, get_filepath_or_buffer would already have thrown an exception.
+    try:
+        from fsspec.spec import AbstractFileSystem
+
+        need_text_wrapping = (*need_text_wrapping, AbstractFileSystem)
+    except ImportError:
+        pass
 
     handles: List[Union[IO, _MMapWrapper]] = list()
     f = path_or_buf
@@ -642,7 +640,7 @@ def __init__(
         self.archive_name = archive_name
         kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED}
         kwargs_zip.update(kwargs)
-        super().__init__(file, mode, **kwargs_zip)
+        super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
     def write(self, data):
         archive_name = self.filename
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 79f2ee9a060a8..9bc1d7fedcb31 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -354,7 +354,7 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
             filepath_or_buffer = get_filepath_or_buffer(
                 filepath_or_buffer, storage_options=storage_options
-            )[0]
+            ).filepath_or_buffer
 
         if isinstance(filepath_or_buffer, self._workbook_class):
             self.book = filepath_or_buffer
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 937eda21aee12..a98eebe1c6a2a 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -34,9 +34,7 @@ def to_feather(df: DataFrame, path, storage_options: StorageOptions = None, **kw
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path, _, _, should_close, _ = get_filepath_or_buffer(
-        path, mode="wb", storage_options=storage_options
-    )
+    ioargs = get_filepath_or_buffer(path, mode="wb", storage_options=storage_options)
 
     if not isinstance(df, DataFrame):
         raise ValueError("feather only support IO with DataFrames")
@@ -74,7 +72,11 @@ def to_feather(df: DataFrame, path, storage_options: StorageOptions = None, **kw
     if df.columns.inferred_type not in valid_types:
         raise ValueError("feather must have string column names")
 
-    feather.write_feather(df, path, **kwargs)
+    feather.write_feather(df, ioargs.filepath_or_buffer, **kwargs)
+
+    if ioargs.should_close:
+        assert not isinstance(ioargs.filepath_or_buffer, str)
+        ioargs.filepath_or_buffer.close()
 
 
 def read_feather(
@@ -122,14 +124,15 @@ def read_feather(
     import_optional_dependency("pyarrow")
     from pyarrow import feather
 
-    path, _, _, should_close, _ = get_filepath_or_buffer(
-        path, storage_options=storage_options
-    )
+    ioargs = get_filepath_or_buffer(path, storage_options=storage_options)
 
-    df = feather.read_feather(path, columns=columns, use_threads=bool(use_threads))
+    df = feather.read_feather(
+        ioargs.filepath_or_buffer, columns=columns, use_threads=bool(use_threads)
+    )
 
     # s3fs only validates the credentials when the file is closed.
-    if should_close:
-        path.close()
+    if ioargs.should_close:
+        assert not isinstance(ioargs.filepath_or_buffer, str)
+        ioargs.filepath_or_buffer.close()
 
     return df
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 0081794def4d0..270caec022fef 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -64,14 +64,17 @@ def __init__(
         compression, self.compression_args = get_compression_method(compression)
         self.compression = infer_compression(path_or_buf, compression)
 
-        (self.path_or_buf, _, _, self.should_close, mode,) = get_filepath_or_buffer(
+        ioargs = get_filepath_or_buffer(
             path_or_buf,
             encoding=encoding,
             compression=self.compression,
             mode=mode,
             storage_options=storage_options,
         )
-        assert self.path_or_buf is not None
+        self.path_or_buf = ioargs.filepath_or_buffer
+        self.should_close = ioargs.should_close
+        self.mode = ioargs.mode
+
         self.sep = sep
         self.na_rep = na_rep
         self.float_format = float_format
@@ -80,7 +83,6 @@ def __init__(
         self.header = header
         self.index = index
         self.index_label = index_label
-        self.mode = mode
         if encoding is None:
             encoding = "utf-8"
         self.encoding = encoding
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 5c7f7dcb4819a..7a3b76ff7e3d0 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -58,12 +58,14 @@ def to_json(
         )
 
     if path_or_buf is not None:
-        path_or_buf, _, _, should_close, _ = get_filepath_or_buffer(
+        ioargs = get_filepath_or_buffer(
             path_or_buf,
             compression=compression,
             mode="wt",
             storage_options=storage_options,
         )
+        path_or_buf = ioargs.filepath_or_buffer
+        should_close = ioargs.should_close
 
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
@@ -102,6 +104,8 @@ def to_json(
             fh.write(s)
         finally:
             fh.close()
+        for handle in handles:
+            handle.close()
     elif path_or_buf is None:
         return s
     else:
@@ -615,7 +619,7 @@ def read_json(
     compression_method, compression = get_compression_method(compression)
     compression_method = infer_compression(path_or_buf, compression_method)
     compression = dict(compression, method=compression_method)
-    filepath_or_buffer, _, compression, should_close, _ = get_filepath_or_buffer(
+    ioargs = get_filepath_or_buffer(
         path_or_buf,
         encoding=encoding,
         compression=compression,
@@ -623,7 +627,7 @@ def read_json(
     )
 
     json_reader = JsonReader(
-        filepath_or_buffer,
+        ioargs.filepath_or_buffer,
         orient=orient,
         typ=typ,
         dtype=dtype,
@@ -633,10 +637,10 @@ def read_json(
         numpy=numpy,
         precise_float=precise_float,
         date_unit=date_unit,
-        encoding=encoding,
+        encoding=ioargs.encoding,
         lines=lines,
         chunksize=chunksize,
-        compression=compression,
+        compression=ioargs.compression,
         nrows=nrows,
     )
 
@@ -644,8 +648,9 @@ def read_json(
         return json_reader
 
     result = json_reader.read()
-    if should_close:
-        filepath_or_buffer.close()
+    if ioargs.should_close:
+        assert not isinstance(ioargs.filepath_or_buffer, str)
+        ioargs.filepath_or_buffer.close()
 
     return result
 
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 0d87cbadd5225..f1b1aa6a43cb5 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -50,7 +50,7 @@ def read_orc(
 
     import pyarrow.orc
 
-    path = get_filepath_or_buffer(path)[0]
-    orc_file = pyarrow.orc.ORCFile(path)
+    ioargs = get_filepath_or_buffer(path)
+    orc_file = pyarrow.orc.ORCFile(ioargs.filepath_or_buffer)
     result = orc_file.read(columns=columns, **kwargs).to_pandas()
     return result
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 4036256f3d509..e5d6ac006e251 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -9,7 +9,7 @@
 
 from pandas import DataFrame, get_option
 
-from pandas.io.common import _expand_user, get_filepath_or_buffer, is_fsspec_url
+from pandas.io.common import get_filepath_or_buffer, is_fsspec_url, stringify_path
 
 
 def get_engine(engine: str) -> "BaseImpl":
@@ -113,7 +113,7 @@ def write(
                 raise ValueError(
                     "storage_options passed with file object or non-fsspec file path"
                 )
-            path = _expand_user(path)
+            path = stringify_path(path)
         if partition_cols is not None:
             # writes to multiple files under the given path
             self.api.parquet.write_to_dataset(
@@ -143,10 +143,12 @@ def read(
                 )
             fs = kwargs.pop("filesystem", None)
             should_close = False
-            path = _expand_user(path)
+            path = stringify_path(path)
 
         if not fs:
-            path, _, _, should_close, _ = get_filepath_or_buffer(path)
+            ioargs = get_filepath_or_buffer(path)
+            path = ioargs.filepath_or_buffer
+            should_close = ioargs.should_close
 
         kwargs["use_pandas_metadata"] = True
         result = self.api.parquet.read_table(
@@ -205,7 +207,7 @@ def write(
                 raise ValueError(
                     "storage_options passed with file object or non-fsspec file path"
                 )
-            path = get_filepath_or_buffer(path)[0]
+            path = get_filepath_or_buffer(path).filepath_or_buffer
 
         with catch_warnings(record=True):
             self.api.write(
@@ -228,7 +230,7 @@ def read(
             ).open()
             parquet_file = self.api.ParquetFile(path, open_with=open_with)
         else:
-            path = get_filepath_or_buffer(path)[0]
+            path = get_filepath_or_buffer(path).filepath_or_buffer
             parquet_file = self.api.ParquetFile(path)
 
         return parquet_file.to_pandas(columns=columns, **kwargs)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index bbdc01e22213d..a917bff9d7ca7 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -432,10 +432,10 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
     # though mypy handling of conditional imports is difficult.
     # See https://github.com/python/mypy/issues/1297
-    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
+    ioargs = get_filepath_or_buffer(
         filepath_or_buffer, encoding, compression, storage_options=storage_options
     )
-    kwds["compression"] = compression
+    kwds["compression"] = ioargs.compression
 
     if kwds.get("date_parser", None) is not None:
         if isinstance(kwds["parse_dates"], bool):
@@ -450,7 +450,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     _validate_names(kwds.get("names", None))
 
     # Create the parser.
-    parser = TextFileReader(fp_or_buf, **kwds)
+    parser = TextFileReader(ioargs.filepath_or_buffer, **kwds)
 
     if chunksize or iterator:
         return parser
@@ -460,9 +460,10 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     finally:
         parser.close()
 
-    if should_close:
+    if ioargs.should_close:
+        assert not isinstance(ioargs.filepath_or_buffer, str)
         try:
-            fp_or_buf.close()
+            ioargs.filepath_or_buffer.close()
         except ValueError:
             pass
 
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 06e371fa1a011..857a2d1b69be4 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -86,15 +86,18 @@ def to_pickle(
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
+    ioargs = get_filepath_or_buffer(
         filepath_or_buffer,
         compression=compression,
         mode="wb",
         storage_options=storage_options,
     )
-    if not isinstance(fp_or_buf, str) and compression == "infer":
+    compression = ioargs.compression
+    if not isinstance(ioargs.filepath_or_buffer, str) and compression == "infer":
         compression = None
-    f, fh = get_handle(fp_or_buf, "wb", compression=compression, is_text=False)
+    f, fh = get_handle(
+        ioargs.filepath_or_buffer, "wb", compression=compression, is_text=False
+    )
     if protocol < 0:
         protocol = pickle.HIGHEST_PROTOCOL
     try:
@@ -105,9 +108,10 @@ def to_pickle(
             f.close()
         for _f in fh:
             _f.close()
-        if should_close:
+        if ioargs.should_close:
+            assert not isinstance(ioargs.filepath_or_buffer, str)
             try:
-                fp_or_buf.close()
+                ioargs.filepath_or_buffer.close()
             except ValueError:
                 pass
 
@@ -189,12 +193,15 @@ def read_pickle(
     >>> import os
     >>> os.remove("./dummy.pkl")
     """
-    fp_or_buf, _, compression, should_close, _ = get_filepath_or_buffer(
+    ioargs = get_filepath_or_buffer(
         filepath_or_buffer, compression=compression, storage_options=storage_options
     )
-    if not isinstance(fp_or_buf, str) and compression == "infer":
+    compression = ioargs.compression
+    if not isinstance(ioargs.filepath_or_buffer, str) and compression == "infer":
         compression = None
-    f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False)
+    f, fh = get_handle(
+        ioargs.filepath_or_buffer, "rb", compression=compression, is_text=False
+    )
 
     # 1) try standard library Pickle
     # 2) try pickle_compat (older pandas version) to handle subclass changes
@@ -222,8 +229,9 @@ def read_pickle(
             f.close()
         for _f in fh:
             _f.close()
-        if should_close:
+        if ioargs.should_close:
+            assert not isinstance(ioargs.filepath_or_buffer, str)
             try:
-                fp_or_buf.close()  # type: ignore
+                ioargs.filepath_or_buffer.close()
             except ValueError:
                 pass
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 4292016edf689..76dac39d1889f 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -137,7 +137,7 @@ def __init__(
         self._current_row_on_page_index = 0
         self._current_row_in_file_index = 0
 
-        self._path_or_buf = get_filepath_or_buffer(path_or_buf)[0]
+        self._path_or_buf = get_filepath_or_buffer(path_or_buf).filepath_or_buffer
         if isinstance(self._path_or_buf, str):
             self._path_or_buf = open(self._path_or_buf, "rb")
             self.handle = self._path_or_buf
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
index 5a8b20ed2742a..e4d9324ce5130 100644
--- a/pandas/io/sas/sas_xport.py
+++ b/pandas/io/sas/sas_xport.py
@@ -253,13 +253,9 @@ def __init__(
         self._chunksize = chunksize
 
         if isinstance(filepath_or_buffer, str):
-            (
-                filepath_or_buffer,
-                encoding,
-                compression,
-                should_close,
-                _,
-            ) = get_filepath_or_buffer(filepath_or_buffer, encoding=encoding)
+            filepath_or_buffer = get_filepath_or_buffer(
+                filepath_or_buffer, encoding=encoding
+            ).filepath_or_buffer
 
         if isinstance(filepath_or_buffer, (str, bytes)):
             self.filepath_or_buffer = open(filepath_or_buffer, "rb")
diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py
index 8292d2d0fe8b0..ae9457a8e3147 100644
--- a/pandas/io/sas/sasreader.py
+++ b/pandas/io/sas/sasreader.py
@@ -109,22 +109,26 @@ def read_sas(
         else:
             raise ValueError("unable to infer format of SAS file")
 
-    filepath_or_buffer, _, _, should_close, _ = get_filepath_or_buffer(
-        filepath_or_buffer, encoding
-    )
+    ioargs = get_filepath_or_buffer(filepath_or_buffer, encoding)
 
     reader: ReaderBase
     if format.lower() == "xport":
         from pandas.io.sas.sas_xport import XportReader
 
         reader = XportReader(
-            filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
+            ioargs.filepath_or_buffer,
+            index=index,
+            encoding=ioargs.encoding,
+            chunksize=chunksize,
         )
     elif format.lower() == "sas7bdat":
         from pandas.io.sas.sas7bdat import SAS7BDATReader
 
         reader = SAS7BDATReader(
-            filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
+            ioargs.filepath_or_buffer,
+            index=index,
+            encoding=ioargs.encoding,
+            chunksize=chunksize,
         )
     else:
         raise ValueError("unknown SAS format")
@@ -134,6 +138,6 @@ def read_sas(
 
     data = reader.read()
 
-    if should_close:
+    if ioargs.should_close:
         reader.close()
     return data
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 24cb92c8a26bb..0074ebc4decb0 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1069,9 +1069,9 @@ def __init__(
         self._native_byteorder = _set_endianness(sys.byteorder)
         path_or_buf = stringify_path(path_or_buf)
         if isinstance(path_or_buf, str):
-            path_or_buf, encoding, _, should_close, _ = get_filepath_or_buffer(
+            path_or_buf = get_filepath_or_buffer(
                 path_or_buf, storage_options=storage_options
-            )
+            ).filepath_or_buffer
 
         if isinstance(path_or_buf, (str, bytes)):
             self.path_or_buf = open(path_or_buf, "rb")
@@ -1979,11 +1979,16 @@ def _open_file_binary_write(
         compression_typ, compression_args = get_compression_method(compression)
         compression_typ = infer_compression(fname, compression_typ)
         compression = dict(compression_args, method=compression_typ)
-        path_or_buf, _, compression, _, _ = get_filepath_or_buffer(
+        ioargs = get_filepath_or_buffer(
             fname, mode="wb", compression=compression, storage_options=storage_options,
         )
-        f, _ = get_handle(path_or_buf, "wb", compression=compression, is_text=False)
-        return f, True, compression
+        f, _ = get_handle(
+            ioargs.filepath_or_buffer,
+            "wb",
+            compression=ioargs.compression,
+            is_text=False,
+        )
+        return f, True, ioargs.compression
     else:
         raise TypeError("fname must be a binary file, buffer or path-like.")
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index e4d5db444e186..85a12a13d19fb 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -105,23 +105,21 @@ def test_infer_compression_from_path(self, extension, expected, path_type):
         compression = icom.infer_compression(path, compression="infer")
         assert compression == expected
 
-    def test_get_filepath_or_buffer_with_path(self):
-        filename = "~/sometest"
-        filepath_or_buffer, _, _, should_close, _ = icom.get_filepath_or_buffer(
-            filename
-        )
-        assert filepath_or_buffer != filename
-        assert os.path.isabs(filepath_or_buffer)
-        assert os.path.expanduser(filename) == filepath_or_buffer
-        assert not should_close
+    @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
+    def test_get_filepath_or_buffer_with_path(self, path_type):
+        # ignore LocalPath: it creates strange paths: /absolute/~/sometest
+        filename = path_type("~/sometest")
+        ioargs = icom.get_filepath_or_buffer(filename)
+        assert ioargs.filepath_or_buffer != filename
+        assert os.path.isabs(ioargs.filepath_or_buffer)
+        assert os.path.expanduser(filename) == ioargs.filepath_or_buffer
+        assert not ioargs.should_close
 
     def test_get_filepath_or_buffer_with_buffer(self):
         input_buffer = StringIO()
-        filepath_or_buffer, _, _, should_close, _ = icom.get_filepath_or_buffer(
-            input_buffer
-        )
-        assert filepath_or_buffer == input_buffer
-        assert not should_close
+        ioargs = icom.get_filepath_or_buffer(input_buffer)
+        assert ioargs.filepath_or_buffer == input_buffer
+        assert not ioargs.should_close
 
     def test_iterator(self):
         reader = pd.read_csv(StringIO(self.data1), chunksize=1)