From 9a7b29ae7a2945ee07fcd956f6a54bd1089de6a7 Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:23:02 +0200
Subject: [PATCH 01/49] [ENH] to_orc

pandas.io.orc.to_orc method definition
---
 pandas/io/orc.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index b02660c089382..06d9563aa080f 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,7 +1,9 @@
 """ orc compat """
 from __future__ import annotations
 
+import os
 from typing import TYPE_CHECKING
+from tempfile import gettempdir
 
 from pandas._typing import (
     FilePath,
@@ -11,8 +13,10 @@
 
 from pandas.io.common import get_handle
 
-if TYPE_CHECKING:
-    from pandas import DataFrame
+from pandas.core import generic
+from pandas.util._decorators import doc
+
+from pandas import DataFrame
 
 
 def read_orc(
@@ -52,3 +56,78 @@ def read_orc(
     with get_handle(path, "rb", is_text=False) as handles:
         orc_file = orc.ORCFile(handles.handle)
         return orc_file.read(columns=columns, **kwargs).to_pandas()
+
+
+def to_orc(
+    df: DataFrame,
+    path: FilePathOrBuffer = None,
+    engine: str = 'pyarrow',
+    index: bool = None,
+    **kwargs
+) -> bytes:
+    """
+    Write a DataFrame to the orc/arrow format.
+    Parameters
+    ----------
+    df : DataFrame
+    path : str or file-like object, default None
+        If a string, it will be used as Root Directory path
+        when writing a partitioned dataset. By file-like object,
+        we refer to objects with a write() method, such as a file handle
+        (e.g. via builtin open function) or io.BytesIO. The engine
+        fastparquet does not accept file-like objects. If path is None,
+        a bytes object is returned.
+    engine : {{'pyarrow'}}, default 'pyarrow'
+        Parquet library to use, or library it self, checked with 'pyarrow' name
+        and version > 4.0.0
+    index : bool, default None
+        If ``True``, include the dataframe's index(es) in the file output. If
+        ``False``, they will not be written to the file.
+        If ``None``, similar to ``infer`` the dataframe's index(es)
+        will be saved. However, instead of being saved as values,
+        the RangeIndex will be stored as a range in the metadata so it
+        doesn't require much space and is faster. Other indexes will
+        be included as columns in the file output.
+    kwargs
+        Additional keyword arguments passed to the engine
+    Returns
+    -------
+    bytes if no path argument is provided else None
+    """
+    if index is None:
+        index = df.index.names[0] is not None
+    
+    if isinstance(engine, str):
+        engine = import_optional_dependency(engine, min_version='4.0.0')
+    else:
+        try:
+            assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
+            assert hasattr(engine, 'orc'), "'pyarrow' module must have version > 4.0.0 with orc module"
+        except Exception as e:
+            raise ValueError("Wrong engine passed, %s" % (
+                e,
+            ))
+            
+    if path is None:
+        # to bytes: tmp path, pyarrow auto closes buffers
+        path = os.path.join(gettempdir(), os.urandom(12).hex())
+        try:
+            engine.orc.write_table(
+                engine.Table.from_pandas(df, preserve_index=index),
+                path, **kwargs
+            )
+            with open(path, 'rb') as path:
+                return path.read()
+        except BaseException as e:
+            raise e
+        finally:
+            try:
+                os.remove(path)
+            except Exception as e:
+                pass
+    else:
+        engine.orc.write_table(
+            engine.Table.from_pandas(df, preserve_index=index),
+            path, **kwargs
+        )
+    return

From d11026f0a310a24a09f0357407835a03ccd2a7bf Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:34:37 +0200
Subject: [PATCH 02/49] pandas.DataFrame.to_orc

set to_orc to pandas.DataFrame
---
 pandas/core/frame.py | 74 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 54ee5ed2f35d1..694cfdf9f8e82 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2900,7 +2900,81 @@ def to_parquet(
             storage_options=storage_options,
             **kwargs,
         )
+    
+    def to_orc(
+        self,
+        path: FilePathOrBuffer = None,
+        engine: str = 'pyarrow',
+        index: bool = None,
+        **kwargs
+    ) -> bytes:
+        """
+        Write a DataFrame to the orc/arrow format.
+        Parameters
+        ----------
+        df : DataFrame
+        path : str or file-like object, default None
+            If a string, it will be used as Root Directory path
+            when writing a partitioned dataset. By file-like object,
+            we refer to objects with a write() method, such as a file handle
+            (e.g. via builtin open function) or io.BytesIO. The engine
+            fastparquet does not accept file-like objects. If path is None,
+            a bytes object is returned.
+        engine : {{'pyarrow'}}, default 'pyarrow'
+            Parquet library to use, or library it self, checked with 'pyarrow' name
+            and version > 4.0.0
+        index : bool, default None
+            If ``True``, include the dataframe's index(es) in the file output. If
+            ``False``, they will not be written to the file.
+            If ``None``, similar to ``infer`` the dataframe's index(es)
+            will be saved. However, instead of being saved as values,
+            the RangeIndex will be stored as a range in the metadata so it
+            doesn't require much space and is faster. Other indexes will
+            be included as columns in the file output.
+        kwargs
+            Additional keyword arguments passed to the engine
+        Returns
+        -------
+        bytes if no path argument is provided else None
+
+        See Also
+        --------
+        read_orc : Read a ORC file.
+        DataFrame.to_parquet : Write a parquet file.
+        DataFrame.to_csv : Write a csv file.
+        DataFrame.to_sql : Write to a sql table.
+        DataFrame.to_hdf : Write to hdf.
 
+        Notes
+        -----
+        This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_ library.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}})
+        >>> df.to_orc('df.orc', compression='gzip')  # doctest: +SKIP
+        >>> pd.read_orc('df.orc')  # doctest: +SKIP
+           col1  col2
+        0     1     3
+        1     2     4
+
+        If you want to get a buffer to the orc content you can write it to io.BytesIO
+        >>> import io
+        >>> b = io.BytesIO(df.to_orc())
+        >>> b.seek(0)
+        0
+        >>> content = b.read()
+        """
+        from pandas.io.orc import to_orc
+
+        return to_orc(
+            self,
+            path,
+            engine,
+            index=index,
+            **kwargs
+        )
+    
     @Substitution(
         header_type="bool",
         header="Whether to print column labels, default True",

From 0146ac3aea9f87cb0e053af0784a17efd8230e3f Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:47:11 +0200
Subject: [PATCH 03/49] Cleaning

---
 pandas/io/orc.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 06d9563aa080f..15161ae202ad3 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -2,6 +2,8 @@
 from __future__ import annotations
 
 import os
+import pandas._testing as tm
+
 from typing import TYPE_CHECKING
 from tempfile import gettempdir
 
@@ -13,10 +15,8 @@
 
 from pandas.io.common import get_handle
 
-from pandas.core import generic
-from pandas.util._decorators import doc
-
-from pandas import DataFrame
+if TYPE_CHECKING:
+    from pandas import DataFrame
 
 
 def read_orc(
@@ -102,29 +102,19 @@ def to_orc(
     else:
         try:
             assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
-            assert hasattr(engine, 'orc'), "'pyarrow' module must have version > 4.0.0 with orc module"
+            assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
         except Exception as e:
-            raise ValueError("Wrong engine passed, %s" % (
-                e,
-            ))
+            raise ValueError("Wrong engine passed, %s" % e)
             
     if path is None:
         # to bytes: tmp path, pyarrow auto closes buffers
-        path = os.path.join(gettempdir(), os.urandom(12).hex())
-        try:
+        with tm.ensure_clean(os.path.join(gettempdir(), os.urandom(12).hex())) as path:
             engine.orc.write_table(
                 engine.Table.from_pandas(df, preserve_index=index),
                 path, **kwargs
             )
             with open(path, 'rb') as path:
                 return path.read()
-        except BaseException as e:
-            raise e
-        finally:
-            try:
-                os.remove(path)
-            except Exception as e:
-                pass
     else:
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),

From 057160250d4038e3cd35f883e7505c57b4c28fc5 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 21 Nov 2021 04:29:12 -0500
Subject: [PATCH 04/49] Fix style & edit comments & change min dependency
 version to 5.0.0

---
 pandas/core/frame.py | 16 ++++++++--------
 pandas/io/orc.py     | 45 +++++++++++++++++++++++---------------------
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 694cfdf9f8e82..24991bd09e118 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2900,7 +2900,7 @@ def to_parquet(
             storage_options=storage_options,
             **kwargs,
         )
-    
+
     def to_orc(
         self,
         path: FilePathOrBuffer = None,
@@ -2909,7 +2909,7 @@ def to_orc(
         **kwargs
     ) -> bytes:
         """
-        Write a DataFrame to the orc/arrow format.
+        Write a DataFrame to the ORC format.
         Parameters
         ----------
         df : DataFrame
@@ -2917,12 +2917,12 @@ def to_orc(
             If a string, it will be used as Root Directory path
             when writing a partitioned dataset. By file-like object,
             we refer to objects with a write() method, such as a file handle
-            (e.g. via builtin open function) or io.BytesIO. The engine
-            fastparquet does not accept file-like objects. If path is None,
-            a bytes object is returned.
+            (e.g. via builtin open function). If path is None,
+            a bytes object is returned. Note that currently the pyarrow
+            engine doesn't work with io.BytesIO.
         engine : {{'pyarrow'}}, default 'pyarrow'
             Parquet library to use, or library it self, checked with 'pyarrow' name
-            and version > 4.0.0
+            and version >= 5.0.0
         index : bool, default None
             If ``True``, include the dataframe's index(es) in the file output. If
             ``False``, they will not be written to the file.
@@ -2952,7 +2952,7 @@ def to_orc(
         Examples
         --------
         >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}})
-        >>> df.to_orc('df.orc', compression='gzip')  # doctest: +SKIP
+        >>> df.to_orc('df.orc')  # doctest: +SKIP
         >>> pd.read_orc('df.orc')  # doctest: +SKIP
            col1  col2
         0     1     3
@@ -2974,7 +2974,7 @@ def to_orc(
             index=index,
             **kwargs
         )
-    
+
     @Substitution(
         header_type="bool",
         header="Whether to print column labels, default True",
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 15161ae202ad3..6664348656c84 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,9 +1,6 @@
 """ orc compat """
 from __future__ import annotations
 
-import os
-import pandas._testing as tm
-
 from typing import TYPE_CHECKING
 from tempfile import gettempdir
 
@@ -66,7 +63,7 @@ def to_orc(
     **kwargs
 ) -> bytes:
     """
-    Write a DataFrame to the orc/arrow format.
+    Write a DataFrame to the ORC format.
     Parameters
     ----------
     df : DataFrame
@@ -74,12 +71,12 @@ def to_orc(
         If a string, it will be used as Root Directory path
         when writing a partitioned dataset. By file-like object,
         we refer to objects with a write() method, such as a file handle
-        (e.g. via builtin open function) or io.BytesIO. The engine
-        fastparquet does not accept file-like objects. If path is None,
-        a bytes object is returned.
+        (e.g. via builtin open function). If path is None,
+        a bytes object is returned. Note that currently the pyarrow
+        engine doesn't work with io.BytesIO.
     engine : {{'pyarrow'}}, default 'pyarrow'
         Parquet library to use, or library it self, checked with 'pyarrow' name
-        and version > 4.0.0
+        and version >= 5.0.0
     index : bool, default None
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file.
@@ -96,25 +93,31 @@ def to_orc(
     """
     if index is None:
         index = df.index.names[0] is not None
-    
+
     if isinstance(engine, str):
-        engine = import_optional_dependency(engine, min_version='4.0.0')
+        engine = import_optional_dependency(engine, min_version='5.0.0')
     else:
         try:
             assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
             assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
-        except Exception as e:
-            raise ValueError("Wrong engine passed, %s" % e)
-            
+        except ImportError as e:
+            raise ValueError (
+            "Unable to find a usable engine; "
+            "tried using: 'pyarrow'.\n"
+            "A suitable version of "
+            "pyarrow is required for ORC support.\n"
+            "Trying to import the above resulted in these errors:"
+            f"\n - {e}"
+        )
+
     if path is None:
-        # to bytes: tmp path, pyarrow auto closes buffers
-        with tm.ensure_clean(os.path.join(gettempdir(), os.urandom(12).hex())) as path:
-            engine.orc.write_table(
-                engine.Table.from_pandas(df, preserve_index=index),
-                path, **kwargs
-            )
-            with open(path, 'rb') as path:
-                return path.read()
+        # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
+        stream = engine.BufferOutputStream()
+        engine.orc.write_table(
+            engine.Table.from_pandas(df, preserve_index=index),
+            stream, **kwargs
+        )
+        return stream.getvalue().to_pybytes()
     else:
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),

From d970b5832d73f682dcddc63646cf55669d4d2a0e Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 21 Nov 2021 04:32:15 -0500
Subject: [PATCH 05/49] Fix style & add to see also

---
 pandas/core/frame.py |  4 +++-
 pandas/io/orc.py     | 16 ++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 24991bd09e118..255cd2388dc1b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2858,6 +2858,7 @@ def to_parquet(
         See Also
         --------
         read_parquet : Read a parquet file.
+        DataFrame.to_orc : Write an orc file.
         DataFrame.to_csv : Write a csv file.
         DataFrame.to_sql : Write to a sql table.
         DataFrame.to_hdf : Write to hdf.
@@ -2947,7 +2948,8 @@ def to_orc(
 
         Notes
         -----
-        This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_ library.
+        This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
+        _ library.
 
         Examples
         --------
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 6664348656c84..06a41912a73fa 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -101,14 +101,14 @@ def to_orc(
             assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
             assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
         except ImportError as e:
-            raise ValueError (
-            "Unable to find a usable engine; "
-            "tried using: 'pyarrow'.\n"
-            "A suitable version of "
-            "pyarrow is required for ORC support.\n"
-            "Trying to import the above resulted in these errors:"
-            f"\n - {e}"
-        )
+            raise ValueError(
+                "Unable to find a usable engine; "
+                "tried using: 'pyarrow'.\n"
+                "A suitable version of "
+                "pyarrow is required for ORC support.\n"
+                "Trying to import the above resulted in these errors:"
+                f"\n - {e}"
+            )
 
     if path is None:
         # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer

From 8b12e9f82e70e805881c9e39bccfba06370982a7 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 21 Nov 2021 04:53:23 -0500
Subject: [PATCH 06/49] Add ORC to documentation

---
 doc/source/reference/frame.rst  |  1 +
 doc/source/reference/io.rst     |  1 +
 doc/source/user_guide/io.rst    | 59 +++++++++++++++++++++++++++++++--
 doc/source/user_guide/scale.rst | 17 ++++++++++
 pandas/core/generic.py          |  1 +
 5 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index ea27d1efbb235..e71ee80767d29 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -373,6 +373,7 @@ Serialization / IO / conversion
 
    DataFrame.from_dict
    DataFrame.from_records
+   DataFrame.to_orc
    DataFrame.to_parquet
    DataFrame.to_pickle
    DataFrame.to_csv
diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
index 70fd381bffd2c..425b5f81be966 100644
--- a/doc/source/reference/io.rst
+++ b/doc/source/reference/io.rst
@@ -159,6 +159,7 @@ ORC
    :toctree: api/
 
    read_orc
+   DataFrame.to_orc
 
 SAS
 ~~~
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 4e19deb84487f..f3e712197f9c5 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -30,7 +30,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
     binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
     binary;`Parquet Format <https://parquet.apache.org/>`__;:ref:`read_parquet<io.parquet>`;:ref:`to_parquet<io.parquet>`
-    binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;
+    binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;:ref:`to_orc<io.orc>`
     binary;`Stata <https://en.wikipedia.org/wiki/Stata>`__;:ref:`read_stata<io.stata_reader>`;:ref:`to_stata<io.stata_writer>`
     binary;`SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__;:ref:`read_sas<io.sas_reader>`;
     binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;
@@ -5562,14 +5562,67 @@ ORC
 .. versionadded:: 1.0.0
 
 Similar to the :ref:`parquet <io.parquet>` format, the `ORC Format <https://orc.apache.org/>`__ is a binary columnar serialization
-for data frames. It is designed to make reading data frames efficient. pandas provides *only* a reader for the
-ORC format, :func:`~pandas.read_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
+for data frames. It is designed to make reading data frames efficient. pandas provides both the reader and the writer for the
+ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
 
 .. warning::
 
    * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
    * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
 
+.. ipython:: python
+
+   df = pd.DataFrame(
+       {
+           "a": list("abc"),
+           "b": list(range(1, 4)),
+           "c": np.arange(3, 6).astype("u1"),
+           "d": np.arange(4.0, 7.0, dtype="float64"),
+           "e": [True, False, True],
+           "f": pd.date_range("20130101", periods=3),
+           "g": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+           "h": pd.Categorical(list("abc")),
+           "i": pd.Categorical(list("abc"), ordered=True),
+       }
+   )
+
+   df
+   df.dtypes
+
+Write to an orc file.
+
+.. ipython:: python
+   :okwarning:
+
+   df.to_orc("example_pa.orc", engine="pyarrow")
+
+Read from an orc file.
+
+.. ipython:: python
+   :okwarning:
+
+   result = pd.read_orc("example_pa.orc", engine="pyarrow")
+
+   result.dtypes
+
+Read only certain columns of an orc file.
+
+.. ipython:: python
+
+   result = pd.read_orc(
+       "example_pa.orc",
+       engine="pyarrow",
+       columns=["a", "b"],
+   )
+   result.dtypes
+
+
+.. ipython:: python
+   :suppress:
+
+   os.remove("example_pa.orc")
+
+
 .. _io.sql:
 
 SQL queries
diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
index 129f43dd36930..cf8a0c9845e62 100644
--- a/doc/source/user_guide/scale.rst
+++ b/doc/source/user_guide/scale.rst
@@ -18,6 +18,23 @@ tool for all situations. If you're working with very large datasets and a tool
 like PostgreSQL fits your needs, then you should probably be using that.
 Assuming you want or need the expressiveness and power of pandas, let's carry on.
 
+.. ipython:: python
+
+   import pandas as pd
+   import numpy as np
+
+.. ipython:: python
+   :suppress:
+
+   from pandas._testing import _make_timeseries
+
+   # Make a random in-memory dataset
+   ts = _make_timeseries(freq="30S", seed=0)
+   ts.to_csv("timeseries.csv")
+   ts.to_orc("timeseries.orc")
+   ts.to_parquet("timeseries.parquet")
+
+
 Load less data
 --------------
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 89a590f291356..78edaf15fe7ce 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2629,6 +2629,7 @@ def to_hdf(
         See Also
         --------
         read_hdf : Read from HDF file.
+        DataFrame.to_orc : Write a DataFrame to the binary orc format.
         DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
         DataFrame.to_sql : Write to a SQL table.
         DataFrame.to_feather : Write out feather-format for DataFrames.

From 65e6b7a0d1ff00ffe7dd9cdac3420f874eacea82 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 21 Nov 2021 21:28:35 -0500
Subject: [PATCH 07/49] Changes according to review

---
 pandas/core/frame.py |  4 ++--
 pandas/io/orc.py     | 29 +++++++++++++++++++----------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 255cd2388dc1b..fc078cd29cf9d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2904,8 +2904,8 @@ def to_parquet(
 
     def to_orc(
         self,
-        path: FilePathOrBuffer = None,
-        engine: str = 'pyarrow',
+        path: FilePath | WriteBuffer[bytes] | None = None,
+        engine: Literal['pyarrow'] = 'pyarrow',
         index: bool = None,
         **kwargs
     ) -> bytes:
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 06a41912a73fa..f352a54b1fc2a 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,12 +1,17 @@
 """ orc compat """
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from types import ModuleType
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+)
 from tempfile import gettempdir
 
 from pandas._typing import (
     FilePath,
     ReadBuffer,
+    WriteBuffer,
 )
 from pandas.compat._optional import import_optional_dependency
 
@@ -57,8 +62,8 @@ def read_orc(
 
 def to_orc(
     df: DataFrame,
-    path: FilePathOrBuffer = None,
-    engine: str = 'pyarrow',
+    path: FilePath | WriteBuffer[bytes] | None = None,
+    engine: Literal['pyarrow'] = 'pyarrow',  # type: ignore[arg-type]
     index: bool = None,
     **kwargs
 ) -> bytes:
@@ -96,7 +101,7 @@ def to_orc(
 
     if isinstance(engine, str):
         engine = import_optional_dependency(engine, min_version='5.0.0')
-    else:
+    elif isinstance(engine, ModuleType):
         try:
             assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
             assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
@@ -109,18 +114,22 @@ def to_orc(
                 "Trying to import the above resulted in these errors:"
                 f"\n - {e}"
             )
+    else:
+        raise TypeError(
+            f"unsuported type for engine: {type(engine)}"
+        )
 
-    if path is None:
-        # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
-        stream = engine.BufferOutputStream()
+    if hasattr(path, "write"):
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),
-            stream, **kwargs
+            path, **kwargs
         )
-        return stream.getvalue().to_pybytes()
     else:
+        # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
+        stream = engine.BufferOutputStream()
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),
-            path, **kwargs
+            stream, **kwargs
         )
+        return stream.getvalue().to_pybytes()
     return

From 2114616e4313a86c43761500253d4171d9282a64 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Tue, 23 Nov 2021 21:48:33 -0500
Subject: [PATCH 08/49] Fix problems mentioned in comment

---
 pandas/core/frame.py |  2 +-
 pandas/io/orc.py     | 44 +++++++++++++++++---------------------------
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fc078cd29cf9d..49ba0f4cbba5f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2922,7 +2922,7 @@ def to_orc(
             a bytes object is returned. Note that currently the pyarrow
             engine doesn't work with io.BytesIO.
         engine : {{'pyarrow'}}, default 'pyarrow'
-            Parquet library to use, or library it self, checked with 'pyarrow' name
+            ORC library to use, or library itself, checked with 'pyarrow' name
             and version >= 5.0.0
         index : bool, default None
             If ``True``, include the dataframe's index(es) in the file output. If
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index f352a54b1fc2a..c919867811752 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,7 +1,6 @@
 """ orc compat """
 from __future__ import annotations
 
-from types import ModuleType
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -63,7 +62,7 @@ def read_orc(
 def to_orc(
     df: DataFrame,
     path: FilePath | WriteBuffer[bytes] | None = None,
-    engine: Literal['pyarrow'] = 'pyarrow',  # type: ignore[arg-type]
+    engine: Literal['pyarrow'] = 'pyarrow',
     index: bool = None,
     **kwargs
 ) -> bytes:
@@ -99,37 +98,28 @@ def to_orc(
     if index is None:
         index = df.index.names[0] is not None
 
-    if isinstance(engine, str):
-        engine = import_optional_dependency(engine, min_version='5.0.0')
-    elif isinstance(engine, ModuleType):
-        try:
-            assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
-            assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
-        except ImportError as e:
-            raise ValueError(
-                "Unable to find a usable engine; "
-                "tried using: 'pyarrow'.\n"
-                "A suitable version of "
-                "pyarrow is required for ORC support.\n"
-                "Trying to import the above resulted in these errors:"
-                f"\n - {e}"
-            )
+    if engine == "pyarrow":
+        engine = import_optional_dependency(engine, min_version='5.0.0') )
     else:
-        raise TypeError(
-            f"unsuported type for engine: {type(engine)}"
+        raise ValueError(
+            f"engine must be 'pyarrow'"
         )
 
-    if hasattr(path, "write"):
+    if not hasattr(path, "write"):
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),
             path, **kwargs
         )
     else:
         # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
-        stream = engine.BufferOutputStream()
-        engine.orc.write_table(
-            engine.Table.from_pandas(df, preserve_index=index),
-            stream, **kwargs
-        )
-        return stream.getvalue().to_pybytes()
-    return
+        with engine.BufferOutputStream() as stream:  # if that is possible
+            engine.orc.write_table(
+                engine.Table.from_pandas(df, preserve_index=index),
+                stream, **kwargs
+            )
+            # allows writing to any (fsspec) URL
+            with get_handle(path, "wb", is_text=False) as handles:
+                orc_bytes = stream.getvalue().to_pybytes()
+                handles.handle.write(orc_bytes)
+                if path is None:
+                    return orc_bytes

From e4b40ef861dbccbfa31eb2c9ba277f766b764ad9 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Tue, 23 Nov 2021 22:11:03 -0500
Subject: [PATCH 09/49] Linter compliance

---
 pandas/io/orc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index c919867811752..81721c8b02c80 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -99,7 +99,7 @@ def to_orc(
         index = df.index.names[0] is not None
 
     if engine == "pyarrow":
-        engine = import_optional_dependency(engine, min_version='5.0.0') )
+        engine = import_optional_dependency(engine, min_version='5.0.0')
     else:
         raise ValueError(
             f"engine must be 'pyarrow'"

From a7aa3e0d409cadce7f3c1f325e142ddc57e03e68 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Wed, 24 Nov 2021 05:54:12 -0500
Subject: [PATCH 10/49] Address comments

---
 pandas/io/orc.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 81721c8b02c80..bedc7580d698a 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -105,7 +105,7 @@ def to_orc(
             f"engine must be 'pyarrow'"
         )
 
-    if not hasattr(path, "write"):
+    if hasattr(path, "write"):
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),
             path, **kwargs
@@ -117,9 +117,9 @@ def to_orc(
                 engine.Table.from_pandas(df, preserve_index=index),
                 stream, **kwargs
             )
+            orc_bytes = stream.getvalue().to_pybytes()
+            if path is None:
+                return orc_bytes
             # allows writing to any (fsspec) URL
             with get_handle(path, "wb", is_text=False) as handles:
-                orc_bytes = stream.getvalue().to_pybytes()
                 handles.handle.write(orc_bytes)
-                if path is None:
-                    return orc_bytes

From 1ab9b6c836a44e73c3ccf348d93f1a99652b134b Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Thu, 2 Dec 2021 06:36:29 -0500
Subject: [PATCH 11/49] Add orc test

---
 pandas/io/orc.py            |  6 +++---
 pandas/tests/io/test_orc.py | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index bedc7580d698a..02bf9f70406dc 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -98,12 +98,12 @@ def to_orc(
     if index is None:
         index = df.index.names[0] is not None
 
-    if engine == "pyarrow":
-        engine = import_optional_dependency(engine, min_version='5.0.0')
-    else:
+    if engine != "pyarrow":
         raise ValueError(
             f"engine must be 'pyarrow'"
         )
+    engine = import_optional_dependency(engine, min_version='5.0.0')
+        
 
     if hasattr(path, "write"):
         engine.orc.write_table(
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index f34e9b940317d..211352cebcb73 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -224,3 +224,24 @@ def test_orc_reader_snappy_compressed(dirpath):
     got = read_orc(inputfile).iloc[:10]
 
     tm.assert_equal(expected, got)
+
+
+def test_orc_roundtrip(dirpath):
+    data = {
+        "boolean1": np.array([False, True], dtype="bool"),
+        "byte1": np.array([1, 100], dtype="int8"),
+        "short1": np.array([1024, 2048], dtype="int16"),
+        "int1": np.array([65536, 65536], dtype="int32"),
+        "long1": np.array([9223372036854775807, 9223372036854775807], dtype="int64"),
+        "float1": np.array([1.0, 2.0], dtype="float32"),
+        "double1": np.array([-15.0, -5.0], dtype="float64"),
+        "bytes1": np.array([b"\x00\x01\x02\x03\x04", b""], dtype="object"),
+        "string1": np.array(["hi", "bye"], dtype="object"),
+    }
+    expected = pd.DataFrame.from_dict(data)
+
+    outputfile = os.path.join(dirpath, "TestOrcFile.testReadWrite.orc")
+    expected.to_orc(outputfile)
+    got = read_orc(outputfile)
+
+    tm.assert_equal(expected, got)

From 96969d50bf12f35f368065062e5719c88e05568a Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Fri, 3 Dec 2021 07:58:47 +0000
Subject: [PATCH 12/49] Fixes from pre-commit [automated commit]

---
 pandas/core/frame.py | 12 +++---------
 pandas/io/orc.py     | 19 +++++++------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 49ba0f4cbba5f..9a3e2ddc6b463 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2905,9 +2905,9 @@ def to_parquet(
     def to_orc(
         self,
         path: FilePath | WriteBuffer[bytes] | None = None,
-        engine: Literal['pyarrow'] = 'pyarrow',
+        engine: Literal["pyarrow"] = "pyarrow",
         index: bool = None,
-        **kwargs
+        **kwargs,
     ) -> bytes:
         """
         Write a DataFrame to the ORC format.
@@ -2969,13 +2969,7 @@ def to_orc(
         """
         from pandas.io.orc import to_orc
 
-        return to_orc(
-            self,
-            path,
-            engine,
-            index=index,
-            **kwargs
-        )
+        return to_orc(self, path, engine, index=index, **kwargs)
 
     @Substitution(
         header_type="bool",
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 02bf9f70406dc..526124e209fa7 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,11 +1,11 @@
 """ orc compat """
 from __future__ import annotations
 
+from tempfile import gettempdir
 from typing import (
     TYPE_CHECKING,
     Literal,
 )
-from tempfile import gettempdir
 
 from pandas._typing import (
     FilePath,
@@ -62,9 +62,9 @@ def read_orc(
 def to_orc(
     df: DataFrame,
     path: FilePath | WriteBuffer[bytes] | None = None,
-    engine: Literal['pyarrow'] = 'pyarrow',
+    engine: Literal["pyarrow"] = "pyarrow",
     index: bool = None,
-    **kwargs
+    **kwargs,
 ) -> bytes:
     """
     Write a DataFrame to the ORC format.
@@ -99,23 +99,18 @@ def to_orc(
         index = df.index.names[0] is not None
 
     if engine != "pyarrow":
-        raise ValueError(
-            f"engine must be 'pyarrow'"
-        )
-    engine = import_optional_dependency(engine, min_version='5.0.0')
-        
+        raise ValueError(f"engine must be 'pyarrow'")
+    engine = import_optional_dependency(engine, min_version="5.0.0")
 
     if hasattr(path, "write"):
         engine.orc.write_table(
-            engine.Table.from_pandas(df, preserve_index=index),
-            path, **kwargs
+            engine.Table.from_pandas(df, preserve_index=index), path, **kwargs
         )
     else:
         # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
         with engine.BufferOutputStream() as stream:  # if that is possible
             engine.orc.write_table(
-                engine.Table.from_pandas(df, preserve_index=index),
-                stream, **kwargs
+                engine.Table.from_pandas(df, preserve_index=index), stream, **kwargs
             )
             orc_bytes = stream.getvalue().to_pybytes()
             if path is None:

From 2a54b8c11beb956c8e59095aecb7608cb002d095 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 20 Mar 2022 00:44:02 -0400
Subject: [PATCH 13/49] Fix issues according to comments

---
 pandas/core/frame.py        | 3 +--
 pandas/io/orc.py            | 9 ++++-----
 pandas/tests/io/test_orc.py | 3 +++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9a3e2ddc6b463..b300b8c714a1c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2919,8 +2919,7 @@ def to_orc(
             when writing a partitioned dataset. By file-like object,
             we refer to objects with a write() method, such as a file handle
             (e.g. via builtin open function). If path is None,
-            a bytes object is returned. Note that currently the pyarrow
-            engine doesn't work with io.BytesIO.
+            a bytes object is returned.
         engine : {{'pyarrow'}}, default 'pyarrow'
             ORC library to use, or library itself, checked with 'pyarrow' name
             and version >= 5.0.0
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 526124e209fa7..2d89573982b39 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -76,8 +76,7 @@ def to_orc(
         when writing a partitioned dataset. By file-like object,
         we refer to objects with a write() method, such as a file handle
         (e.g. via builtin open function). If path is None,
-        a bytes object is returned. Note that currently the pyarrow
-        engine doesn't work with io.BytesIO.
+        a bytes object is returned.
     engine : {{'pyarrow'}}, default 'pyarrow'
         Parquet library to use, or library it self, checked with 'pyarrow' name
         and version >= 5.0.0
@@ -100,7 +99,7 @@ def to_orc(
 
     if engine != "pyarrow":
         raise ValueError(f"engine must be 'pyarrow'")
-    engine = import_optional_dependency(engine, min_version="5.0.0")
+    engine = import_optional_dependency(engine, min_version="4.0.1")
 
     if hasattr(path, "write"):
         engine.orc.write_table(
@@ -112,9 +111,9 @@ def to_orc(
             engine.orc.write_table(
                 engine.Table.from_pandas(df, preserve_index=index), stream, **kwargs
             )
-            orc_bytes = stream.getvalue().to_pybytes()
+            orc_bytes = stream.getvalue()
             if path is None:
-                return orc_bytes
+                return orc_bytes.to_pybytes()
             # allows writing to any (fsspec) URL
             with get_handle(path, "wb", is_text=False) as handles:
                 handles.handle.write(orc_bytes)
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 211352cebcb73..986f02fb9a215 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -227,6 +227,9 @@ def test_orc_reader_snappy_compressed(dirpath):
 
 
 def test_orc_roundtrip(dirpath):
+    # GH44554
+    # PyArrow gained ORC write support with the current argument order
+    pytest.importorskip("pyarrow", minversion="7.0.0")
     data = {
         "boolean1": np.array([False, True], dtype="bool"),
         "byte1": np.array([1, 100], dtype="int8"),

From 1caec9ee5661d8f7d1afaea81c88dc6ef89ba493 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Mon, 21 Mar 2022 04:57:49 -0400
Subject: [PATCH 14/49] Simplify the code base after raising Arrow version to
 7.0.0

---
 pandas/io/orc.py | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 2d89573982b39..21af6fe9fb84b 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,7 +1,6 @@
 """ orc compat """
 from __future__ import annotations
 
-from tempfile import gettempdir
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -79,7 +78,7 @@ def to_orc(
         a bytes object is returned.
     engine : {{'pyarrow'}}, default 'pyarrow'
         Parquet library to use, or library it self, checked with 'pyarrow' name
-        and version >= 5.0.0
+        and version >= 7.0.0
     index : bool, default None
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file.
@@ -98,22 +97,9 @@ def to_orc(
         index = df.index.names[0] is not None
 
     if engine != "pyarrow":
-        raise ValueError(f"engine must be 'pyarrow'")
-    engine = import_optional_dependency(engine, min_version="4.0.1")
-
-    if hasattr(path, "write"):
-        engine.orc.write_table(
-            engine.Table.from_pandas(df, preserve_index=index), path, **kwargs
-        )
-    else:
-        # to bytes: pyarrow auto closes buffers hence we read a pyarrow buffer
-        with engine.BufferOutputStream() as stream:  # if that is possible
-            engine.orc.write_table(
-                engine.Table.from_pandas(df, preserve_index=index), stream, **kwargs
-            )
-            orc_bytes = stream.getvalue()
-            if path is None:
-                return orc_bytes.to_pybytes()
-            # allows writing to any (fsspec) URL
-            with get_handle(path, "wb", is_text=False) as handles:
-                handles.handle.write(orc_bytes)
+        raise ValueError("engine must be 'pyarrow'")
+    engine = import_optional_dependency(engine, min_version="7.0.0")
+
+    engine.orc.write_table(
+        engine.Table.from_pandas(df, preserve_index=index), path, **kwargs
+    )

From 6f0a5380c08c6972bf6c7213bf22fcce3463f6bd Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Mon, 21 Mar 2022 05:36:01 -0400
Subject: [PATCH 15/49] Fix min arrow version in to_orc

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b300b8c714a1c..e95ca119e6057 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2922,7 +2922,7 @@ def to_orc(
             a bytes object is returned.
         engine : {{'pyarrow'}}, default 'pyarrow'
             ORC library to use, or library itself, checked with 'pyarrow' name
-            and version >= 5.0.0
+            and version >= 7.0.0
         index : bool, default None
             If ``True``, include the dataframe's index(es) in the file output. If
             ``False``, they will not be written to the file.
@@ -2952,7 +2952,7 @@ def to_orc(
 
         Examples
         --------
-        >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}})
+        >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
         >>> df.to_orc('df.orc')  # doctest: +SKIP
         >>> pd.read_orc('df.orc')  # doctest: +SKIP
            col1  col2

From ae65214a58f8eef63166119dbc5c990e8f1e7119 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Mon, 21 Mar 2022 05:44:43 -0400
Subject: [PATCH 16/49] Add to_orc test in line with other formats

---
 pandas/tests/io/test_common.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index fc605637dbc11..66905d7b7112f 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -222,6 +222,7 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext):
             (pd.DataFrame.to_html, "os", OSError, "html"),
             (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"),
             (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"),
+            (pd.DataFrame.to_orc, "pyarrow", OSError, "orc"),
             (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"),
             (pd.DataFrame.to_stata, "os", OSError, "dta"),
             (pd.DataFrame.to_json, "os", OSError, "json"),

From 045c411d8640a002e2463c1df1b0ced498ca3bd9 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Tue, 22 Mar 2022 02:27:27 -0400
Subject: [PATCH 17/49] Add BytesIO support & test

---
 doc/source/user_guide/scale.rst | 17 -----------------
 pandas/io/orc.py                | 11 ++++++++++-
 pandas/tests/io/test_orc.py     | 25 ++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
index cf8a0c9845e62..129f43dd36930 100644
--- a/doc/source/user_guide/scale.rst
+++ b/doc/source/user_guide/scale.rst
@@ -18,23 +18,6 @@ tool for all situations. If you're working with very large datasets and a tool
 like PostgreSQL fits your needs, then you should probably be using that.
 Assuming you want or need the expressiveness and power of pandas, let's carry on.
 
-.. ipython:: python
-
-   import pandas as pd
-   import numpy as np
-
-.. ipython:: python
-   :suppress:
-
-   from pandas._testing import _make_timeseries
-
-   # Make a random in-memory dataset
-   ts = _make_timeseries(freq="30S", seed=0)
-   ts.to_csv("timeseries.csv")
-   ts.to_orc("timeseries.orc")
-   ts.to_parquet("timeseries.parquet")
-
-
 Load less data
 --------------
 
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 21af6fe9fb84b..08645a87f09dd 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,6 +1,7 @@
 """ orc compat """
 from __future__ import annotations
 
+import io
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -100,6 +101,14 @@ def to_orc(
         raise ValueError("engine must be 'pyarrow'")
     engine = import_optional_dependency(engine, min_version="7.0.0")
 
+    path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path
     engine.orc.write_table(
-        engine.Table.from_pandas(df, preserve_index=index), path, **kwargs
+        engine.Table.from_pandas(df, preserve_index=index), path_or_buf, **kwargs
     )
+
+    if path is None:
+        assert isinstance(path_or_buf, io.BytesIO)
+        return path_or_buf.getvalue()
+    else:
+        return None
+
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 986f02fb9a215..2eeed0adc379c 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -226,7 +226,7 @@ def test_orc_reader_snappy_compressed(dirpath):
     tm.assert_equal(expected, got)
 
 
-def test_orc_roundtrip(dirpath):
+def test_orc_roundtrip_file(dirpath):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
     pytest.importorskip("pyarrow", minversion="7.0.0")
@@ -248,3 +248,26 @@ def test_orc_roundtrip(dirpath):
     got = read_orc(outputfile)
 
     tm.assert_equal(expected, got)
+
+
+def test_orc_roundtrip_bytesio():
+    # GH44554
+    # PyArrow gained ORC write support with the current argument order
+    pytest.importorskip("pyarrow", minversion="7.0.0")
+    data = {
+        "boolean1": np.array([False, True], dtype="bool"),
+        "byte1": np.array([1, 100], dtype="int8"),
+        "short1": np.array([1024, 2048], dtype="int16"),
+        "int1": np.array([65536, 65536], dtype="int32"),
+        "long1": np.array([9223372036854775807, 9223372036854775807], dtype="int64"),
+        "float1": np.array([1.0, 2.0], dtype="float32"),
+        "double1": np.array([-15.0, -5.0], dtype="float64"),
+        "bytes1": np.array([b"\x00\x01\x02\x03\x04", b""], dtype="object"),
+        "string1": np.array(["hi", "bye"], dtype="object"),
+    }
+    expected = pd.DataFrame.from_dict(data)
+
+    bytesio = expected.to_orc()
+    got = read_orc(bytesio)
+
+    tm.assert_equal(expected, got)

From c00ed0f039594d48fe80243afed27882b9dbf33e Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Tue, 22 Mar 2022 03:16:12 -0400
Subject: [PATCH 18/49] Fix some docs issues

---
 pandas/core/frame.py | 8 ++++----
 pandas/io/orc.py     | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e95ca119e6057..14d0e052a0f8f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2906,15 +2906,15 @@ def to_orc(
         self,
         path: FilePath | WriteBuffer[bytes] | None = None,
         engine: Literal["pyarrow"] = "pyarrow",
-        index: bool = None,
+        index: bool | None = None,
         **kwargs,
-    ) -> bytes:
+    ) -> bytes | None:
         """
         Write a DataFrame to the ORC format.
         Parameters
         ----------
         df : DataFrame
-        path : str or file-like object, default None
+        path : str, file-like object or None, default None
             If a string, it will be used as Root Directory path
             when writing a partitioned dataset. By file-like object,
             we refer to objects with a write() method, such as a file handle
@@ -2923,7 +2923,7 @@ def to_orc(
         engine : {{'pyarrow'}}, default 'pyarrow'
             ORC library to use, or library itself, checked with 'pyarrow' name
             and version >= 7.0.0
-        index : bool, default None
+        index : bool, optional
             If ``True``, include the dataframe's index(es) in the file output. If
             ``False``, they will not be written to the file.
             If ``None``, similar to ``infer`` the dataframe's index(es)
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 08645a87f09dd..61d7cdbccd53a 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -63,15 +63,15 @@ def to_orc(
     df: DataFrame,
     path: FilePath | WriteBuffer[bytes] | None = None,
     engine: Literal["pyarrow"] = "pyarrow",
-    index: bool = None,
+    index: bool | None = None,
     **kwargs,
-) -> bytes:
+) -> bytes | None:
     """
     Write a DataFrame to the ORC format.
     Parameters
     ----------
     df : DataFrame
-    path : str or file-like object, default None
+    path : str, file-like object or None, default None
         If a string, it will be used as Root Directory path
         when writing a partitioned dataset. By file-like object,
         we refer to objects with a write() method, such as a file handle
@@ -80,7 +80,7 @@ def to_orc(
     engine : {{'pyarrow'}}, default 'pyarrow'
         Parquet library to use, or library it self, checked with 'pyarrow' name
         and version >= 7.0.0
-    index : bool, default None
+    index : bool, optional
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file.
         If ``None``, similar to ``infer`` the dataframe's index(es)

From fe275d7f21390127414905a1eb4c3791c6d98663 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Fri, 25 Mar 2022 16:29:56 -0400
Subject: [PATCH 19/49] Use keyword only arguments

---
 pandas/core/frame.py | 1 +
 pandas/io/orc.py     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 14d0e052a0f8f..97661df2cef61 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2905,6 +2905,7 @@ def to_parquet(
     def to_orc(
         self,
         path: FilePath | WriteBuffer[bytes] | None = None,
+        *,
         engine: Literal["pyarrow"] = "pyarrow",
         index: bool | None = None,
         **kwargs,
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 61d7cdbccd53a..f49579425b387 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -62,6 +62,7 @@ def read_orc(
 def to_orc(
     df: DataFrame,
     path: FilePath | WriteBuffer[bytes] | None = None,
+    *,
     engine: Literal["pyarrow"] = "pyarrow",
     index: bool | None = None,
     **kwargs,
@@ -109,6 +110,5 @@ def to_orc(
     if path is None:
         assert isinstance(path_or_buf, io.BytesIO)
         return path_or_buf.getvalue()
-    else:
-        return None
+    return None
 

From 9d3e0dfd464e41224f3a7a47d3b344b51d562f0d Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Thu, 12 May 2022 01:24:53 -0400
Subject: [PATCH 20/49] Fix bug

---
 pandas/io/orc.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index f49579425b387..d3a683ae93aa2 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -102,13 +102,14 @@ def to_orc(
         raise ValueError("engine must be 'pyarrow'")
     engine = import_optional_dependency(engine, min_version="7.0.0")
 
-    path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path
-    engine.orc.write_table(
-        engine.Table.from_pandas(df, preserve_index=index), path_or_buf, **kwargs
-    )
-
-    if path is None:
-        assert isinstance(path_or_buf, io.BytesIO)
-        return path_or_buf.getvalue()
+    was_none = path is None
+    if was_none:
+        path = io.BytesIO()
+    with get_handle(path, "wb") as handles:
+        engine.orc.write_table(
+            engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
+        )
+
+    if was_none:
+        return path.getvalue()
     return None
-

From 971f31c14abce5fdd03e813619fc07b2bbe2f4d8 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sat, 28 May 2022 22:24:18 -0400
Subject: [PATCH 21/49] Fix param issue

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 97661df2cef61..aed78178ffbfd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2969,7 +2969,7 @@ def to_orc(
         """
         from pandas.io.orc import to_orc
 
-        return to_orc(self, path, engine, index=index, **kwargs)
+        return to_orc(self, path, engine=engine, index=index, **kwargs)
 
     @Substitution(
         header_type="bool",

From 52b68a0f8eeaa1cbbc50d92cea8b6baf765e0171 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sat, 28 May 2022 23:09:31 -0400
Subject: [PATCH 22/49] Doctest skipping due to minimal versions

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index aed78178ffbfd..14a0b52308e59 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2962,7 +2962,7 @@ def to_orc(
 
         If you want to get a buffer to the orc content you can write it to io.BytesIO
         >>> import io
-        >>> b = io.BytesIO(df.to_orc())
+        >>> b = io.BytesIO(df.to_orc())  # doctest: +SKIP
         >>> b.seek(0)
         0
         >>> content = b.read()

From 76437ba361b014dd998d9ae1d33b40a72f19b538 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sat, 28 May 2022 23:28:12 -0400
Subject: [PATCH 23/49] Doctest skipping due to minimal versions

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 14a0b52308e59..7f17df9b9580f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2963,9 +2963,9 @@ def to_orc(
         If you want to get a buffer to the orc content you can write it to io.BytesIO
         >>> import io
         >>> b = io.BytesIO(df.to_orc())  # doctest: +SKIP
-        >>> b.seek(0)
+        >>> b.seek(0)  # doctest: +SKIP
         0
-        >>> content = b.read()
+        >>> content = b.read()  # doctest: +SKIP
         """
         from pandas.io.orc import to_orc
 

From c5d585267f2bcd76e894e1134a34e494867cea76 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 00:40:41 -0400
Subject: [PATCH 24/49] Improve spacing in docstring & remove orc test in
 test_common that has unusual pyarrow version requirement and is with a lot of
 other tests

---
 pandas/core/frame.py           | 2 ++
 pandas/tests/io/test_common.py | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7f17df9b9580f..2d00857a14895 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2912,6 +2912,7 @@ def to_orc(
     ) -> bytes | None:
         """
         Write a DataFrame to the ORC format.
+
         Parameters
         ----------
         df : DataFrame
@@ -2934,6 +2935,7 @@ def to_orc(
             be included as columns in the file output.
         kwargs
             Additional keyword arguments passed to the engine
+
         Returns
         -------
         bytes if no path argument is provided else None
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 66905d7b7112f..fc605637dbc11 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -222,7 +222,6 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext):
             (pd.DataFrame.to_html, "os", OSError, "html"),
             (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"),
             (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"),
-            (pd.DataFrame.to_orc, "pyarrow", OSError, "orc"),
             (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"),
             (pd.DataFrame.to_stata, "os", OSError, "dta"),
             (pd.DataFrame.to_json, "os", OSError, "json"),

From b5cd02212be25297c0bcb9e8b114bd21c80ce99e Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 01:28:37 -0400
Subject: [PATCH 25/49] Fix docstring syntax

---
 pandas/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2d00857a14895..b7492d9a31bb1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2924,7 +2924,7 @@ def to_orc(
             a bytes object is returned.
         engine : {{'pyarrow'}}, default 'pyarrow'
             ORC library to use, or library itself, checked with 'pyarrow' name
-            and version >= 7.0.0
+            and version >= 7.0.0.
         index : bool, optional
             If ``True``, include the dataframe's index(es) in the file output. If
             ``False``, they will not be written to the file.
@@ -2933,8 +2933,8 @@ def to_orc(
             the RangeIndex will be stored as a range in the metadata so it
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
-        kwargs
-            Additional keyword arguments passed to the engine
+        **kwargs
+            Additional keyword arguments passed to the engine.
 
         Returns
         -------

From 7ad3df937c872849806d43428792884beca6aed5 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 02:21:56 -0400
Subject: [PATCH 26/49] ORC is not text

---
 pandas/io/orc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index d3a683ae93aa2..635d81a112dd4 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -105,7 +105,7 @@ def to_orc(
     was_none = path is None
     if was_none:
         path = io.BytesIO()
-    with get_handle(path, "wb") as handles:
+    with get_handle(path, "wb", is_text=False) as handles:
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
         )

From a73bb706e190eebfd9e6e4274064503cb2d6f8c0 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 04:48:10 -0400
Subject: [PATCH 27/49] Fix BytesIO bug && do not require orc to be explicitly
 imported before usage && all pytest tests have passed

---
 pandas/io/orc.py            | 7 +++++--
 pandas/tests/io/test_orc.py | 5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 635d81a112dd4..e2b63eaaedadf 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -69,6 +69,7 @@ def to_orc(
 ) -> bytes | None:
     """
     Write a DataFrame to the ORC format.
+
     Parameters
     ----------
     df : DataFrame
@@ -89,8 +90,9 @@ def to_orc(
         the RangeIndex will be stored as a range in the metadata so it
         doesn't require much space and is faster. Other indexes will
         be included as columns in the file output.
-    kwargs
+    **kwargs
         Additional keyword arguments passed to the engine
+
     Returns
     -------
     bytes if no path argument is provided else None
@@ -101,12 +103,13 @@ def to_orc(
     if engine != "pyarrow":
         raise ValueError("engine must be 'pyarrow'")
     engine = import_optional_dependency(engine, min_version="7.0.0")
+    orc = import_optional_dependency("pyarrow.orc")
 
     was_none = path is None
     if was_none:
         path = io.BytesIO()
     with get_handle(path, "wb", is_text=False) as handles:
-        engine.orc.write_table(
+        orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
         )
 
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 2eeed0adc379c..826514d2615a8 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -1,5 +1,6 @@
 """ test orc compat """
 import datetime
+from io import BytesIO
 import os
 
 import numpy as np
@@ -267,7 +268,7 @@ def test_orc_roundtrip_bytesio():
     }
     expected = pd.DataFrame.from_dict(data)
 
-    bytesio = expected.to_orc()
-    got = read_orc(bytesio)
+    bytes = expected.to_orc()
+    got = read_orc(BytesIO(bytes))
 
     tm.assert_equal(expected, got)

From 20aefe79ed4bf2fb4a77f2858fbb5b678895ebc7 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 10:43:25 -0400
Subject: [PATCH 28/49] ORC writer does not work for categorical columns yet

---
 doc/source/user_guide/io.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index f3e712197f9c5..e0999d1ef85ce 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -5569,6 +5569,7 @@ ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This
 
    * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
    * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * Categorical columns are not supported yet.
 
 .. ipython:: python
 
@@ -5581,8 +5582,6 @@ ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This
            "e": [True, False, True],
            "f": pd.date_range("20130101", periods=3),
            "g": pd.date_range("20130101", periods=3, tz="US/Eastern"),
-           "h": pd.Categorical(list("abc")),
-           "i": pd.Categorical(list("abc"), ordered=True),
        }
    )
 

From e7e81fee7a23f30946169613139880aa84b104ee Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 10:49:34 -0400
Subject: [PATCH 29/49] Appease mypy

---
 pandas/io/orc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index e2b63eaaedadf..356a82d2947ab 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -108,11 +108,13 @@ def to_orc(
     was_none = path is None
     if was_none:
         path = io.BytesIO()
+    assert path is not None  # For mypy
     with get_handle(path, "wb", is_text=False) as handles:
         orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
         )
 
     if was_none:
+        assert isinstance(path, io.BytesIO)  # For mypy
         return path.getvalue()
     return None

From 6b659f7007d10a2ecab925988fb6e5b6cf8a446e Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 15:18:55 -0400
Subject: [PATCH 30/49] Appease mypy

---
 pandas/io/orc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 356a82d2947ab..918f75de00c58 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import io
+from types import ModuleType
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -110,6 +111,7 @@ def to_orc(
         path = io.BytesIO()
     assert path is not None  # For mypy
     with get_handle(path, "wb", is_text=False) as handles:
+        assert isinstance(engine, ModuleType)  # For mypy
         orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
         )

From 18e5429968c7e7ad653cdf46c13ae863efaaa203 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 20:23:11 -0400
Subject: [PATCH 31/49] Edit according to reviews

---
 doc/source/user_guide/io.rst                  |  11 ++---
 pandas/core/frame.py                          |  30 +++++++++---
 pandas/io/orc.py                              |  45 +++++++++++++++++-
 .../io/data/orc/TestOrcFile.testReadWrite.orc | Bin 0 -> 1344 bytes
 pandas/tests/io/test_orc.py                   |  41 ++++++++++++++++
 5 files changed, 112 insertions(+), 15 deletions(-)
 create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index e0999d1ef85ce..a7f26e53620f8 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -5569,7 +5569,8 @@ ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This
 
    * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
    * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
-   * Categorical columns are not supported yet.
+   * Unsigned integers, intervals, periods, sparse and categorical Dtypes are not supported yet.
+   * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
 
 .. ipython:: python
 
@@ -5577,11 +5578,9 @@ ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This
        {
            "a": list("abc"),
            "b": list(range(1, 4)),
-           "c": np.arange(3, 6).astype("u1"),
-           "d": np.arange(4.0, 7.0, dtype="float64"),
-           "e": [True, False, True],
-           "f": pd.date_range("20130101", periods=3),
-           "g": pd.date_range("20130101", periods=3, tz="US/Eastern"),
+           "c": np.arange(4.0, 7.0, dtype="float64"),
+           "d": [True, False, True],
+           "e": pd.date_range("20130101", periods=3),
        }
    )
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b7492d9a31bb1..20b130191e0b9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2913,9 +2913,10 @@ def to_orc(
         """
         Write a DataFrame to the ORC format.
 
+        .. versionadded:: 1.5.0
+
         Parameters
         ----------
-        df : DataFrame
         path : str, file-like object or None, default None
             If a string, it will be used as Root Directory path
             when writing a partitioned dataset. By file-like object,
@@ -2923,23 +2924,32 @@ def to_orc(
             (e.g. via builtin open function). If path is None,
             a bytes object is returned.
         engine : {{'pyarrow'}}, default 'pyarrow'
-            ORC library to use, or library itself, checked with 'pyarrow' name
-            and version >= 7.0.0.
+            ORC library to use, or library it self, checked with 'pyarrow' name
+            and version >= 7.0.0. Raises ValueError if it is anything but
+            'pyarrow'.
         index : bool, optional
-            If ``True``, include the dataframe's index(es) in the file output. If
-            ``False``, they will not be written to the file.
+            If ``True``, include the dataframe's index(es) in the file output.
+            If ``False``, they will not be written to the file.
             If ``None``, similar to ``infer`` the dataframe's index(es)
             will be saved. However, instead of being saved as values,
             the RangeIndex will be stored as a range in the metadata so it
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
         **kwargs
-            Additional keyword arguments passed to the engine.
+            Additional keyword arguments passed to the engine
 
         Returns
         -------
         bytes if no path argument is provided else None
 
+        Raises
+        ------
+        NotImplementedError
+            * Dtype of one or more columns is unsigned integers, intervals,
+                periods, sparse or categorical.
+        ValueError
+            * engine is not pyarrow.
+
         See Also
         --------
         read_orc : Read a ORC file.
@@ -2950,8 +2960,14 @@ def to_orc(
 
         Notes
         -----
-        This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
+        * Before using this function you should read the :ref:`user guide about
+        ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
+        * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
         _ library.
+        * Unsigned integers, intervals, periods, sparse and categorical Dtypes
+        are not supported yet.
+        * Currently timezones in datetime columns are not preserved when a
+        dataframe is converted into ORC files.
 
         Examples
         --------
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 918f75de00c58..bc14a90d463cf 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -71,9 +71,14 @@ def to_orc(
     """
     Write a DataFrame to the ORC format.
 
+    .. versionadded:: 1.5.0
+
     Parameters
     ----------
     df : DataFrame
+        The dataframe to be written to ORC. Raises NotImplementedError
+        if dtype of one or more columns is category, unsigned integers,
+        intervals, periods or sparse.
     path : str, file-like object or None, default None
         If a string, it will be used as Root Directory path
         when writing a partitioned dataset. By file-like object,
@@ -81,8 +86,9 @@ def to_orc(
         (e.g. via builtin open function). If path is None,
         a bytes object is returned.
     engine : {{'pyarrow'}}, default 'pyarrow'
-        Parquet library to use, or library it self, checked with 'pyarrow' name
-        and version >= 7.0.0
+        ORC library to use, or library it self, checked with 'pyarrow' name
+        and version >= 7.0.0. Raises ValueError if it is anything but
+        'pyarrow'.
     index : bool, optional
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file.
@@ -97,10 +103,45 @@ def to_orc(
     Returns
     -------
     bytes if no path argument is provided else None
+
+    Raises
+    ------
+    NotImplementedError
+        * Dtype of one or more columns is unsigned integers, intervals,
+            periods, sparse or categorical.
+    ValueError
+        * engine is not pyarrow.
+
+    Notes
+    -----
+    * Before using this function you should read the
+    :ref:`user guide about ORC <io.orc>` and
+    :ref:`install optional dependencies <install.warn_orc>`.
+    * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
+    _ library.
+    * Unsigned integers, intervals, periods, sparse and categorical Dtypes
+    are not supported yet.
+    * Currently timezones in datetime columns are not preserved when a
+    dataframe is converted into ORC files.
     """
     if index is None:
         index = df.index.names[0] is not None
 
+    # If unsupported dtypes are found raise NotImplementedError
+    for dtype in df.dtypes:
+        dtype_str = dtype.__str__().lower()
+        if (
+            "category" in dtype_str
+            or "interval" in dtype_str
+            or "sparse" in dtype_str
+            or "period" in dtype_str
+            or "uint" in dtype_str
+        ):
+            raise NotImplementedError(
+                """The dtype of one or more columns is unsigned integers,
+intervals, periods, sparse or categorical which is not supported yet."""
+            )
+
     if engine != "pyarrow":
         raise ValueError("engine must be 'pyarrow'")
     engine = import_optional_dependency(engine, min_version="7.0.0")
diff --git a/pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc b/pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc
new file mode 100644
index 0000000000000000000000000000000000000000..852360ecad74cb3545fd160f895fca67d68ef276
GIT binary patch
literal 1344
zcma)6&5qJg6uv*b?P*25Q&BHP6C{}oYC<5Im~=B_CQ&y!X5!Ao1TrWjgeC(nW@8$k
zz&FrGaI35CH1jkrTvHFXmkR<jk)M8lhjQ9;;OO`;xDs$eNaz52MS>fFi(;;QU=Mge
zDVnzfQFzyYg#b+cqUwAes6}uc10PZ;)0#?Km{b=@h)D_36ePY?no10J7vUWYZ^0}#
z4=R@w%kBq`?&Kb@sOswu$nX7~f{;$7Akev>w3(UGa%M^&XIL8AE5s`~U=W~JwcZY}
zI^K}pSwCchp~5iF@Y%_;89m9fw-ks(YfKp-_{y`7)HJ_neU@j0r2E($eyC`%X|SB-
z*;Bl4lyN7|4(PYR5WgsJ!IeZ^)kxJ%jZ9TEausQG)Lo52?P+w?Lyew6P-U%sU57m?
zhMGvKks2hD8O;>eIX9XsS8K;;M}gMDXrb`F;rU#nU6Xr8drImYv}O|Y`6;5%zHfZr
z_`VXjaoFeDk!ff3xLRZVh(CMrI`~P0Vry4+b$vxQHgnrTUcQI_v45*27wp|~+ctL$
zxcnia62e!ey0!1D!Rl|I6#DZ`5v~V*1id5~^}FMCBzkAlemim}gXDDDj-25bk7AUJ
z-=q0%lyp&gC&`E2XkIlho}e+Ao({31MjE6=>Hx$tY8kVPTjp3MEORXb%e;cMSgc19
zFGmuujU-+lNxWqw@uee)SMT@szl48EwvQyXBZ)Uh()s-IIqivd^GWj&eBt<mN>iuK
SNU3p7TkfYnoF5$@R{jEaEZds^

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 826514d2615a8..5f075a118017c 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -272,3 +272,44 @@ def test_orc_roundtrip_bytesio():
     got = read_orc(BytesIO(bytes))
 
     tm.assert_equal(expected, got)
+
+
+testdata = [
+    (pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}), dirpath),
+    (pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}), dirpath),
+    (
+        pd.DataFrame(
+            {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
+        ),
+        dirpath,
+    ),
+    (
+        pd.DataFrame(
+            {
+                "unimpl": [
+                    pd.Period("2022-01-03", freq="D"),
+                    pd.Period("2022-01-04", freq="D"),
+                ]
+            }
+        ),
+        dirpath,
+    ),
+    (
+        pd.DataFrame({"unimpl": [np.nan] * 100}).astype(
+            pd.SparseDtype("float", np.nan)
+        ),
+        dirpath,
+    ),
+]
+
+
+@pytest.mark.parametrize("unimplemented, dirpath", testdata)
+def test_orc_writer_unimplemented_dtypes(unimplemented, dirpath):
+    # GH44554
+    # PyArrow gained ORC write support with the current argument order
+    pytest.importorskip("pyarrow", minversion="7.0.0")
+    outputfile = os.path.join(dirpath, "TestOrcFile.testReadWrite.orc")
+    msg = """The dtype of one or more columns is unsigned integers,
+intervals, periods, sparse or categorical which is not supported yet."""
+    with pytest.raises(NotImplementedError, match=msg):
+        unimplemented.to_orc(outputfile)

From 21cba6ed7e9196dd24cf2e6509ab6f44ad47e8eb Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 20:30:06 -0400
Subject: [PATCH 32/49] Fix path bug in test_orc

---
 pandas/tests/io/test_orc.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 5f075a118017c..d0fe5325a1c44 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -275,13 +275,12 @@ def test_orc_roundtrip_bytesio():
 
 
 testdata = [
-    (pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}), dirpath),
-    (pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}), dirpath),
+    (pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")})),
+    (pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")})),
     (
         pd.DataFrame(
             {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
         ),
-        dirpath,
     ),
     (
         pd.DataFrame(
@@ -292,24 +291,17 @@ def test_orc_roundtrip_bytesio():
                 ]
             }
         ),
-        dirpath,
-    ),
-    (
-        pd.DataFrame({"unimpl": [np.nan] * 100}).astype(
-            pd.SparseDtype("float", np.nan)
-        ),
-        dirpath,
     ),
+    (pd.DataFrame({"unimpl": [np.nan] * 100}).astype(pd.SparseDtype("float", np.nan)),),
 ]
 
 
-@pytest.mark.parametrize("unimplemented, dirpath", testdata)
-def test_orc_writer_unimplemented_dtypes(unimplemented, dirpath):
+@pytest.mark.parametrize("unimplemented", testdata)
+def test_orc_writer_unimplemented_dtypes(unimplemented):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
     pytest.importorskip("pyarrow", minversion="7.0.0")
-    outputfile = os.path.join(dirpath, "TestOrcFile.testReadWrite.orc")
     msg = """The dtype of one or more columns is unsigned integers,
 intervals, periods, sparse or categorical which is not supported yet."""
     with pytest.raises(NotImplementedError, match=msg):
-        unimplemented.to_orc(outputfile)
+        unimplemented.to_orc()

From c7bf39ff2c400deecfd41b216e52dfd1321d1c58 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 20:59:25 -0400
Subject: [PATCH 33/49] Fix testdata tuple bug in test_orc

---
 pandas/tests/io/test_orc.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index d0fe5325a1c44..bac17c2f88584 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -275,24 +275,20 @@ def test_orc_roundtrip_bytesio():
 
 
 testdata = [
-    (pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")})),
-    (pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")})),
-    (
-        pd.DataFrame(
-            {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
-        ),
+    pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}),
+    pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}),
+    pd.DataFrame(
+        {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
     ),
-    (
-        pd.DataFrame(
-            {
-                "unimpl": [
-                    pd.Period("2022-01-03", freq="D"),
-                    pd.Period("2022-01-04", freq="D"),
-                ]
-            }
-        ),
+    pd.DataFrame(
+        {
+            "unimpl": [
+                pd.Period("2022-01-03", freq="D"),
+                pd.Period("2022-01-04", freq="D"),
+            ]
+        }
     ),
-    (pd.DataFrame({"unimpl": [np.nan] * 100}).astype(pd.SparseDtype("float", np.nan)),),
+    pd.DataFrame({"unimpl": [np.nan] * 100}).astype(pd.SparseDtype("float", np.nan)),
 ]
 
 
From e43c6dd73bc2ac30aac771a4924dc0568ceccd28 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 21:28:39 -0400
Subject: [PATCH 34/49] Fix docstrings for check compliance

---
 pandas/core/frame.py | 2 +-
 pandas/io/orc.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 20b130191e0b9..4682ac4878bca 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2936,7 +2936,7 @@ def to_orc(
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
         **kwargs
-            Additional keyword arguments passed to the engine
+            Additional keyword arguments passed to the engine.
 
         Returns
         -------
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index bc14a90d463cf..51b29ce8144e3 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -98,7 +98,7 @@ def to_orc(
         doesn't require much space and is faster. Other indexes will
         be included as columns in the file output.
     **kwargs
-        Additional keyword arguments passed to the engine
+        Additional keyword arguments passed to the engine.
 
     Returns
     -------

From afa0a8a3735c082c8855e9e721caafc7e751922e Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 22:14:37 -0400
Subject: [PATCH 35/49] read_orc does not have engine as a param

---
 doc/source/user_guide/io.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index a7f26e53620f8..973e978a1453f 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -5599,7 +5599,7 @@ Read from an orc file.
 .. ipython:: python
    :okwarning:
 
-   result = pd.read_orc("example_pa.orc", engine="pyarrow")
+   result = pd.read_orc("example_pa.orc")
 
    result.dtypes
 
@@ -5609,7 +5609,6 @@ Read only certain columns of an orc file.
 
    result = pd.read_orc(
        "example_pa.orc",
-       engine="pyarrow",
        columns=["a", "b"],
    )
    result.dtypes

From cd585e678432c5359fab1bd07dd5a0277fdf0e6b Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 29 May 2022 23:54:54 -0400
Subject: [PATCH 36/49] Fix sphinx warnings

---
 pandas/core/frame.py | 16 ++++++++--------
 pandas/io/orc.py     | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4682ac4878bca..51c5c4a7d802a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2945,10 +2945,10 @@ def to_orc(
         Raises
         ------
         NotImplementedError
-            * Dtype of one or more columns is unsigned integers, intervals,
-                periods, sparse or categorical.
+            Dtype of one or more columns is category, unsigned integers, interval,
+            period or sparse.
         ValueError
-            * engine is not pyarrow.
+            engine is not pyarrow.
 
         See Also
         --------
@@ -2961,13 +2961,13 @@ def to_orc(
         Notes
         -----
         * Before using this function you should read the :ref:`user guide about
-        ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
-        * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
-        _ library.
+          ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
+        * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
+          library.
         * Unsigned integers, intervals, periods, sparse and categorical Dtypes
-        are not supported yet.
+          are not supported yet.
         * Currently timezones in datetime columns are not preserved when a
-        dataframe is converted into ORC files.
+          dataframe is converted into ORC files.
 
         Examples
         --------
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 51b29ce8144e3..02f43855c4340 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -107,22 +107,22 @@ def to_orc(
     Raises
     ------
     NotImplementedError
-        * Dtype of one or more columns is unsigned integers, intervals,
-            periods, sparse or categorical.
+        Dtype of one or more columns is category, unsigned integers, interval,
+        period or sparse.
     ValueError
-        * engine is not pyarrow.
+        engine is not pyarrow.
 
     Notes
     -----
     * Before using this function you should read the
-    :ref:`user guide about ORC <io.orc>` and
-    :ref:`install optional dependencies <install.warn_orc>`.
-    * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`
-    _ library.
+      :ref:`user guide about ORC <io.orc>` and
+      :ref:`install optional dependencies <install.warn_orc>`.
+    * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
+      library.
     * Unsigned integers, intervals, periods, sparse and categorical Dtypes
-    are not supported yet.
+      are not supported yet.
     * Currently timezones in datetime columns are not preserved when a
-    dataframe is converted into ORC files.
+      dataframe is converted into ORC files.
     """
     if index is None:
         index = df.index.names[0] is not None

From b509c3c22c5be1eaba6af400cc585b71c4939d26 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 30 May 2022 17:56:30 -0400
Subject: [PATCH 37/49] Improve docs & rerun tests

---
 pandas/core/frame.py | 2 +-
 pandas/io/orc.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 51c5c4a7d802a..38ee793f283cd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2964,7 +2964,7 @@ def to_orc(
           ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
         * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
           library.
-        * Unsigned integers, intervals, periods, sparse and categorical Dtypes
+        * Category, unsigned integers, interval, period and sparse Dtypes
           are not supported yet.
         * Currently timezones in datetime columns are not preserved when a
           dataframe is converted into ORC files.
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 02f43855c4340..655753e22cd05 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -119,7 +119,7 @@ def to_orc(
       :ref:`install optional dependencies <install.warn_orc>`.
     * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
       library.
-    * Unsigned integers, intervals, periods, sparse and categorical Dtypes
+    * Category, unsigned integers, interval, period and sparse Dtypes
       are not supported yet.
     * Currently timezones in datetime columns are not preserved when a
       dataframe is converted into ORC files.

From 1001002907fa8892d65f3ab7fe7df5a1cd8d7d00 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 30 May 2022 19:37:22 -0400
Subject: [PATCH 38/49] Force retrigger

---
 pandas/tests/io/test_orc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index bac17c2f88584..d09ecf6207926 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -288,7 +288,7 @@ def test_orc_roundtrip_bytesio():
             ]
         }
     ),
-    pd.DataFrame({"unimpl": [np.nan] * 100}).astype(pd.SparseDtype("float", np.nan)),
+    pd.DataFrame({"unimpl": [np.nan] * 50}).astype(pd.SparseDtype("float", np.nan)),
 ]
 
 
From 55cab6ee3551eb3efd68783dab267299e889b993 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Tue, 7 Jun 2022 02:07:30 -0400
Subject: [PATCH 39/49] Fix test_orc according to review

---
 pandas/tests/io/test_orc.py | 48 ++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index d09ecf6207926..5364b25b4a61f 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import read_orc
 import pandas._testing as tm
@@ -22,6 +24,26 @@ def dirpath(datapath):
     return datapath("io", "data", "orc")
 
 
+# Examples of dataframes with dtypes for which conversion to ORC
+# hasn't been implemented yet.
+orc_writer_not_implemented = [
+    pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}),
+    pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}),
+    pd.DataFrame(
+        {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
+    ),
+    pd.DataFrame(
+        {
+            "unimpl": [
+                pd.Period("2022-01-03", freq="D"),
+                pd.Period("2022-01-04", freq="D"),
+            ]
+        }
+    ),
+    pd.DataFrame({"unimpl": [np.nan] * 50}).astype(pd.SparseDtype("float", np.nan)),
+]
+
+
 def test_orc_reader_empty(dirpath):
     columns = [
         "boolean1",
@@ -227,10 +249,10 @@ def test_orc_reader_snappy_compressed(dirpath):
     tm.assert_equal(expected, got)
 
 
+@td.skip_if_no("pyarrow", min_version="7.0.0")
 def test_orc_roundtrip_file(dirpath):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
-    pytest.importorskip("pyarrow", minversion="7.0.0")
     data = {
         "boolean1": np.array([False, True], dtype="bool"),
         "byte1": np.array([1, 100], dtype="int8"),
@@ -251,10 +273,10 @@ def test_orc_roundtrip_file(dirpath):
     tm.assert_equal(expected, got)
 
 
+@td.skip_if_no("pyarrow", min_version="7.0.0")
 def test_orc_roundtrip_bytesio():
     # GH44554
     # PyArrow gained ORC write support with the current argument order
-    pytest.importorskip("pyarrow", minversion="7.0.0")
     data = {
         "boolean1": np.array([False, True], dtype="bool"),
         "byte1": np.array([1, 100], dtype="int8"),
@@ -274,29 +296,11 @@ def test_orc_roundtrip_bytesio():
     tm.assert_equal(expected, got)
 
 
-testdata = [
-    pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}),
-    pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}),
-    pd.DataFrame(
-        {"unimpl": [pd.Interval(left=0, right=2), pd.Interval(left=0, right=5)]}
-    ),
-    pd.DataFrame(
-        {
-            "unimpl": [
-                pd.Period("2022-01-03", freq="D"),
-                pd.Period("2022-01-04", freq="D"),
-            ]
-        }
-    ),
-    pd.DataFrame({"unimpl": [np.nan] * 50}).astype(pd.SparseDtype("float", np.nan)),
-]
-
-
-@pytest.mark.parametrize("unimplemented", testdata)
+@td.skip_if_no("pyarrow", min_version="7.0.0")
+@pytest.mark.parametrize("unimplemented", orc_writer_not_implemented)
 def test_orc_writer_unimplemented_dtypes(unimplemented):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
-    pytest.importorskip("pyarrow", minversion="7.0.0")
     msg = """The dtype of one or more columns is unsigned integers,
 intervals, periods, sparse or categorical which is not supported yet."""
     with pytest.raises(NotImplementedError, match=msg):

From 89283e0d2e7a9bf80894e0edd07b6d3aeeafe6c8 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Tue, 7 Jun 2022 08:46:02 -0400
Subject: [PATCH 40/49] Rename some variables and func

---
 pandas/tests/io/test_orc.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 5364b25b4a61f..932918c75dec5 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -25,8 +25,9 @@ def dirpath(datapath):
 
 
 # Examples of dataframes with dtypes for which conversion to ORC
-# hasn't been implemented yet.
-orc_writer_not_implemented = [
+# hasn't been implemented yet, that is, Category, unsigned integers,
+# interval, period and sparse.
+orc_writer_dtypes_not_supported = [
     pd.DataFrame({"unimpl": np.array([1, 20], dtype="uint64")}),
     pd.DataFrame({"unimpl": pd.Series(["a", "b", "a"], dtype="category")}),
     pd.DataFrame(
@@ -297,11 +298,11 @@ def test_orc_roundtrip_bytesio():
 
 
 @td.skip_if_no("pyarrow", min_version="7.0.0")
-@pytest.mark.parametrize("unimplemented", orc_writer_not_implemented)
-def test_orc_writer_unimplemented_dtypes(unimplemented):
+@pytest.mark.parametrize("df_not_supported", orc_writer_dtypes_not_supported)
+def test_orc_writer_dtypes_not_supported(df_not_supported):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
     msg = """The dtype of one or more columns is unsigned integers,
 intervals, periods, sparse or categorical which is not supported yet."""
     with pytest.raises(NotImplementedError, match=msg):
-        unimplemented.to_orc()
+        df_not_supported.to_orc()

From 989468a4b66637df12337ae0846d858fab06a0d0 Mon Sep 17 00:00:00 2001
From: Ian Alexander Joiner <iajoiner809@gmail.com>
Date: Tue, 7 Jun 2022 12:11:35 -0400
Subject: [PATCH 41/49] Update pandas/core/frame.py

Co-authored-by: Matthew Roeschke <emailformattr@gmail.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 38ee793f283cd..364fda763c718 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2923,7 +2923,7 @@ def to_orc(
             we refer to objects with a write() method, such as a file handle
             (e.g. via builtin open function). If path is None,
             a bytes object is returned.
-        engine : {{'pyarrow'}}, default 'pyarrow'
+        engine : str, default 'pyarrow'
             ORC library to use, or library it self, checked with 'pyarrow' name
             and version >= 7.0.0. Raises ValueError if it is anything but
             'pyarrow'.

From a7fca36785c8e7a747f0d4d6e6031706b5e43e58 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sat, 11 Jun 2022 23:58:24 -0400
Subject: [PATCH 42/49] Fix issues according to review

---
 doc/source/user_guide/io.rst   |  5 +++--
 doc/source/whatsnew/v1.5.0.rst | 22 ++++++++++++++++++++++
 pandas/core/frame.py           |  7 +++----
 pandas/io/orc.py               |  4 ++--
 pandas/tests/io/test_orc.py    |  8 ++++----
 5 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 973e978a1453f..4c5d189e1bba3 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -5568,8 +5568,9 @@ ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This
 .. warning::
 
    * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
-   * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
-   * Unsigned integers, intervals, periods, sparse and categorical Dtypes are not supported yet.
+   * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0.
+   * :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc` are not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
    * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
 
 .. ipython:: python
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 8a7ad077c2a90..2719d415dedc0 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -100,6 +100,28 @@ as seen in the following example.
                1 2021-01-02 08:00:00  4
                2 2021-01-02 16:00:00  5
 
+.. _whatsnew_150.enhancements.orc:
+
+Writing to ORC files
+^^^^^^^^^^^^^^^^^^^^
+
+The new method :meth:`DataFrame.to_orc` allows writing to ORC files (:issue:`43864`).
+
+This functionality depends the `pyarrow <http://arrow.apache.org/docs/python/>`__ library. For more details, see :ref:`the IO docs on ORC <io.orc>`.
+
+.. warning::
+
+   * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
+   * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0.
+   * :func:`~pandas.DataFrame.to_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+   * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
+
+.. code-block:: python
+
+    df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
+    df.to_orc("./out.orc")
+
 .. _whatsnew_150.enhancements.tar:
 
 Reading directly from TAR archives
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 364fda763c718..6626b7dcad24d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2925,8 +2925,7 @@ def to_orc(
             a bytes object is returned.
         engine : str, default 'pyarrow'
             ORC library to use, or library it self, checked with 'pyarrow' name
-            and version >= 7.0.0. Raises ValueError if it is anything but
-            'pyarrow'.
+            and version >= 7.0.0.
         index : bool, optional
             If ``True``, include the dataframe's index(es) in the file output.
             If ``False``, they will not be written to the file.
@@ -2964,8 +2963,8 @@ def to_orc(
           ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
         * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
           library.
-        * Category, unsigned integers, interval, period and sparse Dtypes
-          are not supported yet.
+        * For supported dtypes please refer to
+          `this article <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
         * Currently timezones in datetime columns are not preserved when a
           dataframe is converted into ORC files.
 
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 655753e22cd05..e679097ec3600 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -119,8 +119,8 @@ def to_orc(
       :ref:`install optional dependencies <install.warn_orc>`.
     * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
       library.
-    * Category, unsigned integers, interval, period and sparse Dtypes
-      are not supported yet.
+    * For supported dtypes please refer to
+      `this article <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
     * Currently timezones in datetime columns are not preserved when a
       dataframe is converted into ORC files.
     """
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 932918c75dec5..d5a0d4cc4fff0 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -267,11 +267,11 @@ def test_orc_roundtrip_file(dirpath):
     }
     expected = pd.DataFrame.from_dict(data)
 
-    outputfile = os.path.join(dirpath, "TestOrcFile.testReadWrite.orc")
-    expected.to_orc(outputfile)
-    got = read_orc(outputfile)
+    with tm.ensure_clean() as path:
+        expected.to_orc(path)
+        got = read_orc(path)
 
-    tm.assert_equal(expected, got)
+        tm.assert_equal(expected, got)
 
 
 @td.skip_if_no("pyarrow", min_version="7.0.0")

From 7fc338c6ab8434651cf70b6e4821a0accd832e7c Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Sun, 12 Jun 2022 01:13:29 -0400
Subject: [PATCH 43/49] Forced reruns

---
 pandas/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6626b7dcad24d..8d6357cc9ad57 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2970,12 +2970,12 @@ def to_orc(
 
         Examples
         --------
-        >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
+        >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [4, 3]})
         >>> df.to_orc('df.orc')  # doctest: +SKIP
         >>> pd.read_orc('df.orc')  # doctest: +SKIP
            col1  col2
-        0     1     3
-        1     2     4
+        0     1     4
+        1     2     3
 
         If you want to get a buffer to the orc content you can write it to io.BytesIO
         >>> import io

From 91d15560330c00a0e6253d8ca76bb6f94f230fa4 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 05:12:33 -0400
Subject: [PATCH 44/49] Fix issues according to review

---
 pandas/core/frame.py                          |  15 +++---
 pandas/io/orc.py                              |  44 +++++++++++-------
 .../io/data/orc/TestOrcFile.testReadWrite.orc | Bin 1344 -> 0 bytes
 3 files changed, 33 insertions(+), 26 deletions(-)
 delete mode 100644 pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8d6357cc9ad57..183a45e8dca05 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2908,7 +2908,7 @@ def to_orc(
         *,
         engine: Literal["pyarrow"] = "pyarrow",
         index: bool | None = None,
-        **kwargs,
+        engine_kwargs: dict[str, Any] | None = None,
     ) -> bytes | None:
         """
         Write a DataFrame to the ORC format.
@@ -2924,8 +2924,7 @@ def to_orc(
             (e.g. via builtin open function). If path is None,
             a bytes object is returned.
         engine : str, default 'pyarrow'
-            ORC library to use, or library it self, checked with 'pyarrow' name
-            and version >= 7.0.0.
+            ORC library to use. Pyarrow must be >= 7.0.0.
         index : bool, optional
             If ``True``, include the dataframe's index(es) in the file output.
             If ``False``, they will not be written to the file.
@@ -2934,8 +2933,8 @@ def to_orc(
             the RangeIndex will be stored as a range in the metadata so it
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
-        **kwargs
-            Additional keyword arguments passed to the engine.
+        engine_kwargs: dict[str, Any], optional
+            Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
         Returns
         -------
@@ -2963,8 +2962,8 @@ def to_orc(
           ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`.
         * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
           library.
-        * For supported dtypes please refer to
-          `this article <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+        * For supported dtypes please refer to `supported ORC features in Arrow
+          <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
         * Currently timezones in datetime columns are not preserved when a
           dataframe is converted into ORC files.
 
@@ -2986,7 +2985,7 @@ def to_orc(
         """
         from pandas.io.orc import to_orc
 
-        return to_orc(self, path, engine=engine, index=index, **kwargs)
+        return to_orc(self, path, engine=engine, index=index, **engine_kwargs)
 
     @Substitution(
         header_type="bool",
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index e679097ec3600..793c0356894fb 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -5,6 +5,7 @@
 from types import ModuleType
 from typing import (
     TYPE_CHECKING,
+    Any,
     Literal,
 )
 
@@ -15,6 +16,14 @@
 )
 from pandas.compat._optional import import_optional_dependency
 
+from pandas.core.dtypes.common import (
+    is_categorical,
+    is_interval_dtype,
+    is_period_dtype,
+    is_sparse,
+    is_unsigned_integer_dtype,
+)
+
 from pandas.io.common import get_handle
 
 if TYPE_CHECKING:
@@ -66,7 +75,7 @@ def to_orc(
     *,
     engine: Literal["pyarrow"] = "pyarrow",
     index: bool | None = None,
-    **kwargs,
+    engine_kwargs: dict[str, Any] | None = None,
 ) -> bytes | None:
     """
     Write a DataFrame to the ORC format.
@@ -85,10 +94,8 @@ def to_orc(
         we refer to objects with a write() method, such as a file handle
         (e.g. via builtin open function). If path is None,
         a bytes object is returned.
-    engine : {{'pyarrow'}}, default 'pyarrow'
-        ORC library to use, or library it self, checked with 'pyarrow' name
-        and version >= 7.0.0. Raises ValueError if it is anything but
-        'pyarrow'.
+    engine : str, default 'pyarrow'
+        ORC library to use. Pyarrow must be >= 7.0.0.
     index : bool, optional
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file.
@@ -97,8 +104,8 @@ def to_orc(
         the RangeIndex will be stored as a range in the metadata so it
         doesn't require much space and is faster. Other indexes will
         be included as columns in the file output.
-    **kwargs
-        Additional keyword arguments passed to the engine.
+    engine_kwargs: dict[str, Any], optional
+        Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
     Returns
     -------
@@ -119,8 +126,8 @@ def to_orc(
       :ref:`install optional dependencies <install.warn_orc>`.
     * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
       library.
-    * For supported dtypes please refer to
-      `this article <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+    * For supported dtypes please refer to `supported ORC features in Arrow
+      <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
     * Currently timezones in datetime columns are not preserved when a
       dataframe is converted into ORC files.
     """
@@ -128,18 +135,17 @@ def to_orc(
         index = df.index.names[0] is not None
 
     # If unsupported dtypes are found raise NotImplementedError
+    # In Pyarrow 9.0.0 this check will no longer be needed
     for dtype in df.dtypes:
-        dtype_str = dtype.__str__().lower()
         if (
-            "category" in dtype_str
-            or "interval" in dtype_str
-            or "sparse" in dtype_str
-            or "period" in dtype_str
-            or "uint" in dtype_str
+            is_categorical(dtype)
+            or is_interval_dtype(dtype)
+            or is_period_dtype(dtype)
+            or is_sparse(dtype)
+            or is_unsigned_integer_dtype(dtype)
         ):
             raise NotImplementedError(
-                """The dtype of one or more columns is unsigned integers,
-intervals, periods, sparse or categorical which is not supported yet."""
+                """The dtype of one or more columns is not supported yet."""
             )
 
     if engine != "pyarrow":
@@ -154,7 +160,9 @@ def to_orc(
     with get_handle(path, "wb", is_text=False) as handles:
         assert isinstance(engine, ModuleType)  # For mypy
         orc.write_table(
-            engine.Table.from_pandas(df, preserve_index=index), handles.handle, **kwargs
+            engine.Table.from_pandas(df, preserve_index=index),
+            handles.handle,
+            **engine_kwargs,
         )
 
     if was_none:
diff --git a/pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc b/pandas/tests/io/data/orc/TestOrcFile.testReadWrite.orc
deleted file mode 100644
index 852360ecad74cb3545fd160f895fca67d68ef276..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1344
zcma)6&5qJg6uv*b?P*25Q&BHP6C{}oYC<5Im~=B_CQ&y!X5!Ao1TrWjgeC(nW@8$k
zz&FrGaI35CH1jkrTvHFXmkR<jk)M8lhjQ9;;OO`;xDs$eNaz52MS>fFi(;;QU=Mge
zDVnzfQFzyYg#b+cqUwAes6}uc10PZ;)0#?Km{b=@h)D_36ePY?no10J7vUWYZ^0}#
z4=R@w%kBq`?&Kb@sOswu$nX7~f{;$7Akev>w3(UGa%M^&XIL8AE5s`~U=W~JwcZY}
zI^K}pSwCchp~5iF@Y%_;89m9fw-ks(YfKp-_{y`7)HJ_neU@j0r2E($eyC`%X|SB-
z*;Bl4lyN7|4(PYR5WgsJ!IeZ^)kxJ%jZ9TEausQG)Lo52?P+w?Lyew6P-U%sU57m?
zhMGvKks2hD8O;>eIX9XsS8K;;M}gMDXrb`F;rU#nU6Xr8drImYv}O|Y`6;5%zHfZr
z_`VXjaoFeDk!ff3xLRZVh(CMrI`~P0Vry4+b$vxQHgnrTUcQI_v45*27wp|~+ctL$
zxcnia62e!ey0!1D!Rl|I6#DZ`5v~V*1id5~^}FMCBzkAlemim}gXDDDj-25bk7AUJ
z-=q0%lyp&gC&`E2XkIlho}e+Ao({31MjE6=>Hx$tY8kVPTjp3MEORXb%e;cMSgc19
zFGmuujU-+lNxWqw@uee)SMT@szl48EwvQyXBZ)Uh()s-IIqivd^GWj&eBt<mN>iuK
SNU3p7TkfYnoF5$@R{jEaEZds^


From a28c5a8786697f990612efaae8beb46e00871944 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 05:26:51 -0400
Subject: [PATCH 45/49] Reraise Pyarrow TypeError as NotImplementedError

---
 pandas/io/orc.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 793c0356894fb..078b9c7a9af84 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -20,7 +20,6 @@
     is_categorical,
     is_interval_dtype,
     is_period_dtype,
-    is_sparse,
     is_unsigned_integer_dtype,
 )
 
@@ -141,7 +140,6 @@ def to_orc(
             is_categorical(dtype)
             or is_interval_dtype(dtype)
             or is_period_dtype(dtype)
-            or is_sparse(dtype)
             or is_unsigned_integer_dtype(dtype)
         ):
             raise NotImplementedError(
@@ -159,11 +157,16 @@ def to_orc(
     assert path is not None  # For mypy
     with get_handle(path, "wb", is_text=False) as handles:
         assert isinstance(engine, ModuleType)  # For mypy
-        orc.write_table(
-            engine.Table.from_pandas(df, preserve_index=index),
-            handles.handle,
-            **engine_kwargs,
-        )
+        try:
+            orc.write_table(
+                engine.Table.from_pandas(df, preserve_index=index),
+                handles.handle,
+                **engine_kwargs,
+            )
+        except TypeError as e:
+            raise NotImplementedError(
+                """The dtype of one or more columns is not supported yet."""
+            ) from e
 
     if was_none:
         assert isinstance(path, io.BytesIO)  # For mypy

From 162e5bb36461b47248b4a8d47555e05994658b29 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 06:02:05 -0400
Subject: [PATCH 46/49] Fix bugs

---
 pandas/core/frame.py | 4 ++--
 pandas/io/orc.py     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 183a45e8dca05..fd853d6603a2c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2908,7 +2908,7 @@ def to_orc(
         *,
         engine: Literal["pyarrow"] = "pyarrow",
         index: bool | None = None,
-        engine_kwargs: dict[str, Any] | None = None,
+        engine_kwargs: dict[str, Any] = {},
     ) -> bytes | None:
         """
         Write a DataFrame to the ORC format.
@@ -2933,7 +2933,7 @@ def to_orc(
             the RangeIndex will be stored as a range in the metadata so it
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
-        engine_kwargs: dict[str, Any], optional
+        engine_kwargs : dict[str, Any], default {}
             Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
         Returns
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 078b9c7a9af84..b80c6635776c9 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -74,7 +74,7 @@ def to_orc(
     *,
     engine: Literal["pyarrow"] = "pyarrow",
     index: bool | None = None,
-    engine_kwargs: dict[str, Any] | None = None,
+    engine_kwargs: dict[str, Any] = {},
 ) -> bytes | None:
     """
     Write a DataFrame to the ORC format.
@@ -103,7 +103,7 @@ def to_orc(
         the RangeIndex will be stored as a range in the metadata so it
         doesn't require much space and is faster. Other indexes will
         be included as columns in the file output.
-    engine_kwargs: dict[str, Any], optional
+    engine_kwargs : dict[str, Any], default {}
         Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
     Returns

From b23058350fe42a239c5d58cd47fa92069be82c03 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 06:12:57 -0400
Subject: [PATCH 47/49] Fix expected error msg in orc tests

---
 pandas/io/orc.py            | 4 ++--
 pandas/tests/io/test_orc.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index b80c6635776c9..36bc72fa4c936 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -143,7 +143,7 @@ def to_orc(
             or is_unsigned_integer_dtype(dtype)
         ):
             raise NotImplementedError(
-                """The dtype of one or more columns is not supported yet."""
+                "The dtype of one or more columns is not supported yet."
             )
 
     if engine != "pyarrow":
@@ -165,7 +165,7 @@ def to_orc(
             )
         except TypeError as e:
             raise NotImplementedError(
-                """The dtype of one or more columns is not supported yet."""
+                "The dtype of one or more columns is not supported yet."
             ) from e
 
     if was_none:
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index d5a0d4cc4fff0..0bb320907b813 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -302,7 +302,6 @@ def test_orc_roundtrip_bytesio():
 def test_orc_writer_dtypes_not_supported(df_not_supported):
     # GH44554
     # PyArrow gained ORC write support with the current argument order
-    msg = """The dtype of one or more columns is unsigned integers,
-intervals, periods, sparse or categorical which is not supported yet."""
+    msg = "The dtype of one or more columns is not supported yet."
     with pytest.raises(NotImplementedError, match=msg):
         df_not_supported.to_orc()

From e16edabf733255f23991017be58e98db036f4204 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 06:22:13 -0400
Subject: [PATCH 48/49] Avoid deprecated functions

---
 pandas/io/orc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 36bc72fa4c936..5e1d3f7c86b23 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -17,7 +17,7 @@
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import (
-    is_categorical,
+    is_categorical_dtype,
     is_interval_dtype,
     is_period_dtype,
     is_unsigned_integer_dtype,
@@ -137,7 +137,7 @@ def to_orc(
     # In Pyarrow 9.0.0 this check will no longer be needed
     for dtype in df.dtypes:
         if (
-            is_categorical(dtype)
+            is_categorical_dtype(dtype)
             or is_interval_dtype(dtype)
             or is_period_dtype(dtype)
             or is_unsigned_integer_dtype(dtype)

From e4770b8cd6c49cf88c63931adbec40a747a55b84 Mon Sep 17 00:00:00 2001
From: Ian Joiner <iajoiner809@gmail.com>
Date: Mon, 13 Jun 2022 17:34:47 -0400
Subject: [PATCH 49/49] Replace {} with None in arg

---
 pandas/core/frame.py | 8 +++++---
 pandas/io/orc.py     | 6 ++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fd853d6603a2c..00cfd0e0f8fd7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2908,7 +2908,7 @@ def to_orc(
         *,
         engine: Literal["pyarrow"] = "pyarrow",
         index: bool | None = None,
-        engine_kwargs: dict[str, Any] = {},
+        engine_kwargs: dict[str, Any] | None = None,
     ) -> bytes | None:
         """
         Write a DataFrame to the ORC format.
@@ -2933,7 +2933,7 @@ def to_orc(
             the RangeIndex will be stored as a range in the metadata so it
             doesn't require much space and is faster. Other indexes will
             be included as columns in the file output.
-        engine_kwargs : dict[str, Any], default {}
+        engine_kwargs : dict[str, Any] or None, default None
             Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
         Returns
@@ -2985,7 +2985,9 @@ def to_orc(
         """
         from pandas.io.orc import to_orc
 
-        return to_orc(self, path, engine=engine, index=index, **engine_kwargs)
+        return to_orc(
+            self, path, engine=engine, index=index, engine_kwargs=engine_kwargs
+        )
 
     @Substitution(
         header_type="bool",
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 5e1d3f7c86b23..40754a56bbe8b 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -74,7 +74,7 @@ def to_orc(
     *,
     engine: Literal["pyarrow"] = "pyarrow",
     index: bool | None = None,
-    engine_kwargs: dict[str, Any] = {},
+    engine_kwargs: dict[str, Any] | None = None,
 ) -> bytes | None:
     """
     Write a DataFrame to the ORC format.
@@ -103,7 +103,7 @@ def to_orc(
         the RangeIndex will be stored as a range in the metadata so it
         doesn't require much space and is faster. Other indexes will
         be included as columns in the file output.
-    engine_kwargs : dict[str, Any], default {}
+    engine_kwargs : dict[str, Any] or None, default None
         Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
 
     Returns
@@ -132,6 +132,8 @@ def to_orc(
     """
     if index is None:
         index = df.index.names[0] is not None
+    if engine_kwargs is None:
+        engine_kwargs = {}
 
     # If unsupported dtypes are found raise NotImplementedError
     # In Pyarrow 9.0.0 this check will no longer be needed