Skip to content

Commit ebaec28

Browse files
committed
fix doc error & make simpler
1 parent b846bff commit ebaec28

File tree

3 files changed

+18
-31
lines changed

3 files changed

+18
-31
lines changed

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -4866,7 +4866,7 @@ ORC
48664866

48674867
.. versionadded:: 1.0.0
48684868

4869-
Similar to the `parquet <io.parquet>` format, the `ORC Format <//https://orc.apache.org/>`__ binary columnar serialization
4869+
Similar to the :ref:`parquet <io.parquet>` format, the `ORC Format <//https://orc.apache.org/>`__ is a binary columnar serialization
48704870
for data frames. It is designed to make reading data frames efficient. Pandas provides *only* a reader for the
48714871
ORC format, :func:`~pandas.read_orc`. This requires the ``pyarrow <https://arrow.apache.org/docs/python/>`__ library.
48724872

pandas/io/orc.py

+13-30
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import distutils
44
from typing import TYPE_CHECKING, List, Optional
55

6-
from pandas.compat._optional import import_optional_dependency
7-
86
from pandas._typing import FilePathOrBuffer
97

108
from pandas.io.common import get_filepath_or_buffer
@@ -13,34 +11,9 @@
1311
from pandas import DataFrame
1412

1513

16-
class PyArrowImpl:
17-
def __init__(self):
18-
pyarrow = import_optional_dependency(
19-
"pyarrow", extra="pyarrow is required for orc support."
20-
)
21-
22-
# we require a newer version of pyarrow thaN we support for parquet
23-
import pyarrow
24-
25-
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
26-
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
27-
28-
import pyarrow.orc
29-
30-
self.api = pyarrow
31-
32-
def read(
33-
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
34-
) -> "DataFrame":
35-
path, _, _, _ = get_filepath_or_buffer(path)
36-
orc_file = self.api.orc.ORCFile(path)
37-
result = orc_file.read(columns=columns, **kwargs).to_pandas()
38-
return result
39-
40-
4114
def read_orc(
4215
path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs,
43-
):
16+
) -> "DataFrame":
4417
"""
4518
Load an ORC object from the file path, returning a DataFrame.
4619
@@ -70,5 +43,15 @@ def read_orc(
7043
DataFrame
7144
"""
7245

73-
impl = PyArrowImpl()
74-
return impl.read(path, columns=columns, **kwargs)
46+
# we require a newer version of pyarrow thaN we support for parquet
47+
import pyarrow
48+
49+
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
50+
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
51+
52+
import pyarrow.orc
53+
54+
path, _, _, _ = get_filepath_or_buffer(path)
55+
orc_file = pyarrow.orc.ORCFile(path)
56+
result = orc_file.read(columns=columns, **kwargs).to_pandas()
57+
return result

pandas/tests/io/test_orc.py

+4
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
import numpy as np
66
import pytest
77

8+
from pandas.compat import is_platform_windows
9+
810
import pandas as pd
911
from pandas import read_orc
1012
import pandas.util.testing as tm
1113

1214
pytest.importorskip("pyarrow", minversion="0.13.0")
1315

16+
pytestmark = pytest.mark.skipif(is_platform_windows, "skipping on windows")
17+
1418
pytestmark = pytest.mark.filterwarnings(
1519
"ignore:RangeIndex.* is deprecated:DeprecationWarning"
1620
)

0 commit comments

Comments
 (0)