Skip to content

Commit ad1bade

Browse files
committed
Revert "simplify imports"
This reverts commit 6919a70.
1 parent bf4f013 commit ad1bade

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

pandas/io/orc.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" orc compat """
22

3+
import distutils
34
from typing import List, Optional
45

56
from pandas.compat._optional import import_optional_dependency
@@ -11,13 +12,28 @@
1112

1213

1314
def get_engine(engine: str) -> "PyArrowImpl":
14-
""" return our implementation; we only support a pyarrow impl """
15+
""" return our implementation """
1516

1617
if engine == "auto":
1718
engine = get_option("io.orc.engine")
1819

20+
if engine == "auto":
21+
# try engines in this order
22+
try:
23+
return PyArrowImpl()
24+
except ImportError:
25+
pass
26+
27+
raise ImportError(
28+
"Unable to find a usable engine; "
29+
"tried using: 'pyarrow'.\n"
30+
"pyarrow is required for orc "
31+
"support"
32+
)
33+
1934
if engine not in ["pyarrow"]:
2035
raise ValueError("engine must be 'pyarrow'")
36+
2137
return PyArrowImpl()
2238

2339

@@ -27,15 +43,12 @@ def __init__(self):
2743
"pyarrow", extra="pyarrow is required for orc support."
2844
)
2945

30-
try:
31-
import pyarrow
32-
except ImportError:
33-
raise ImportError(
34-
"Unable to find a usable engine; "
35-
"tried using: 'pyarrow'.\n"
36-
"pyarrow is required for orc "
37-
"support"
38-
)
46+
# we require a newer version of pyarrow thaN we support for parquet
47+
import pyarrow
48+
49+
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
50+
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
51+
3952
import pyarrow.orc
4053

4154
self.api = pyarrow
@@ -44,7 +57,9 @@ def read(
4457
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
4558
) -> DataFrame:
4659
path, _, _, _ = get_filepath_or_buffer(path)
47-
orc_file = self.api.orc.ORCFile(path)
60+
61+
py_file = self.api.input_stream(path)
62+
orc_file = self.api.orc.ORCFile(py_file)
4863

4964
result = orc_file.read(columns=columns, **kwargs).to_pandas()
5065

pandas/tests/io/test_orc.py

+29-9
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
11
""" test orc compat """
22
import datetime
3-
from decimal import Decimal
3+
import distutils
44
import os
55

66
import numpy as np
77
import pytest
88

9+
from pandas.compat import is_platform_windows
10+
911
import pandas as pd
1012
import pandas.util.testing as tm
1113

1214
from pandas.io.orc import PyArrowImpl, get_engine, read_orc
1315

14-
pyarrow = pytest.importorskip("pyarrow")
16+
try:
17+
import pyarrow # noqa
18+
19+
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
20+
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
21+
22+
_HAVE_PYARROW = True
23+
except ImportError:
24+
_HAVE_PYARROW = False
1525

1626
pytestmark = pytest.mark.filterwarnings(
1727
"ignore:RangeIndex.* is deprecated:DeprecationWarning"
@@ -23,7 +33,16 @@ def dirpath(datapath):
2333
return datapath("io", "data", "orc")
2434

2535

26-
def test_options_get_engine():
36+
@pytest.fixture
37+
def pa():
38+
if not _HAVE_PYARROW:
39+
pytest.skip("pyarrow is not installed")
40+
if is_platform_windows():
41+
pytest.skip("pyarrow orc not available by default on windows")
42+
return "pyarrow"
43+
44+
45+
def test_options_get_engine(pa):
2746
assert isinstance(get_engine("pyarrow"), PyArrowImpl)
2847

2948
with pd.option_context("io.orc.engine", "pyarrow"):
@@ -42,7 +61,7 @@ def test_invalid_engine(dirpath):
4261
read_orc(inputfile, engine=engine, columns=["boolean1"])
4362

4463

45-
def test_orc_reader_empty(dirpath):
64+
def test_orc_reader_empty(dirpath, pa):
4665
columns = [
4766
"boolean1",
4867
"byte1",
@@ -75,7 +94,7 @@ def test_orc_reader_empty(dirpath):
7594
tm.assert_equal(expected, got)
7695

7796

78-
def test_orc_reader_basic(dirpath):
97+
def test_orc_reader_basic(dirpath, pa):
7998
data = {
8099
"boolean1": np.array([False, True], dtype="bool"),
81100
"byte1": np.array([1, 100], dtype="int8"),
@@ -95,7 +114,8 @@ def test_orc_reader_basic(dirpath):
95114
tm.assert_equal(expected, got)
96115

97116

98-
def test_orc_reader_decimal(dirpath):
117+
def test_orc_reader_decimal(dirpath, pa):
118+
from decimal import Decimal
99119

100120
# Only testing the first 10 rows of data
101121
data = {
@@ -123,7 +143,7 @@ def test_orc_reader_decimal(dirpath):
123143
tm.assert_equal(expected, got)
124144

125145

126-
def test_orc_reader_date_low(dirpath):
146+
def test_orc_reader_date_low(dirpath, pa):
127147
data = {
128148
"time": np.array(
129149
[
@@ -164,7 +184,7 @@ def test_orc_reader_date_low(dirpath):
164184
tm.assert_equal(expected, got)
165185

166186

167-
def test_orc_reader_date_high(dirpath):
187+
def test_orc_reader_date_high(dirpath, pa):
168188
data = {
169189
"time": np.array(
170190
[
@@ -205,7 +225,7 @@ def test_orc_reader_date_high(dirpath):
205225
tm.assert_equal(expected, got)
206226

207227

208-
def test_orc_reader_snappy_compressed(dirpath):
228+
def test_orc_reader_snappy_compressed(dirpath, pa):
209229
data = {
210230
"int1": np.array(
211231
[

0 commit comments

Comments
 (0)