|
3 | 3 | import distutils
|
4 | 4 | from typing import TYPE_CHECKING, List, Optional
|
5 | 5 |
|
6 |
| -from pandas.compat._optional import import_optional_dependency |
7 |
| - |
8 | 6 | from pandas._typing import FilePathOrBuffer
|
9 | 7 |
|
10 | 8 | from pandas.io.common import get_filepath_or_buffer
|
|
13 | 11 | from pandas import DataFrame
|
14 | 12 |
|
15 | 13 |
|
16 |
| -class PyArrowImpl: |
17 |
| - def __init__(self): |
18 |
| - pyarrow = import_optional_dependency( |
19 |
| - "pyarrow", extra="pyarrow is required for orc support." |
20 |
| - ) |
21 |
| - |
22 |
| - # we require a newer version of pyarrow thaN we support for parquet |
23 |
| - import pyarrow |
24 |
| - |
25 |
| - if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0": |
26 |
| - raise ImportError("pyarrow must be >= 0.13.0 for read_orc") |
27 |
| - |
28 |
| - import pyarrow.orc |
29 |
| - |
30 |
| - self.api = pyarrow |
31 |
| - |
32 |
| - def read( |
33 |
| - self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs |
34 |
| - ) -> "DataFrame": |
35 |
| - path, _, _, _ = get_filepath_or_buffer(path) |
36 |
| - orc_file = self.api.orc.ORCFile(path) |
37 |
| - result = orc_file.read(columns=columns, **kwargs).to_pandas() |
38 |
| - return result |
39 |
| - |
40 |
| - |
41 | 14 | def read_orc(
|
42 | 15 | path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs,
|
43 |
| -): |
| 16 | +) -> "DataFrame": |
44 | 17 | """
|
45 | 18 | Load an ORC object from the file path, returning a DataFrame.
|
46 | 19 |
|
@@ -70,5 +43,15 @@ def read_orc(
|
70 | 43 | DataFrame
|
71 | 44 | """
|
72 | 45 |
|
73 |
| - impl = PyArrowImpl() |
74 |
| - return impl.read(path, columns=columns, **kwargs) |
| 46 | + # we require a newer version of pyarrow thaN we support for parquet |
| 47 | + import pyarrow |
| 48 | + |
| 49 | + if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0": |
| 50 | + raise ImportError("pyarrow must be >= 0.13.0 for read_orc") |
| 51 | + |
| 52 | + import pyarrow.orc |
| 53 | + |
| 54 | + path, _, _, _ = get_filepath_or_buffer(path) |
| 55 | + orc_file = pyarrow.orc.ORCFile(path) |
| 56 | + result = orc_file.read(columns=columns, **kwargs).to_pandas() |
| 57 | + return result |
0 commit comments