Skip to content

Commit fa60ce4

Browse files
committed
simplify imports
1 parent 65add0f commit fa60ce4

File tree

2 files changed

+20
-55
lines changed

2 files changed

+20
-55
lines changed

pandas/io/orc.py

+11-26
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
""" orc compat """
22

3-
import distutils
43
from typing import List, Optional
54

65
from pandas.compat._optional import import_optional_dependency
@@ -12,28 +11,13 @@
1211

1312

1413
def get_engine(engine: str) -> "PyArrowImpl":
15-
""" return our implementation """
14+
""" return our implementation; we only support a pyarrow impl """
1615

1716
if engine == "auto":
1817
engine = get_option("io.orc.engine")
1918

20-
if engine == "auto":
21-
# try engines in this order
22-
try:
23-
return PyArrowImpl()
24-
except ImportError:
25-
pass
26-
27-
raise ImportError(
28-
"Unable to find a usable engine; "
29-
"tried using: 'pyarrow'.\n"
30-
"pyarrow is required for orc "
31-
"support"
32-
)
33-
3419
if engine not in ["pyarrow"]:
3520
raise ValueError("engine must be 'pyarrow'")
36-
3721
return PyArrowImpl()
3822

3923

@@ -43,12 +27,15 @@ def __init__(self):
4327
"pyarrow", extra="pyarrow is required for orc support."
4428
)
4529

46-
# we require a newer version of pyarrow thaN we support for parquet
47-
import pyarrow
48-
49-
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
50-
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
51-
30+
try:
31+
import pyarrow
32+
except ImportError:
33+
raise ImportError(
34+
"Unable to find a usable engine; "
35+
"tried using: 'pyarrow'.\n"
36+
"pyarrow is required for orc "
37+
"support"
38+
)
5239
import pyarrow.orc
5340

5441
self.api = pyarrow
@@ -57,9 +44,7 @@ def read(
5744
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
5845
) -> DataFrame:
5946
path, _, _, _ = get_filepath_or_buffer(path)
60-
61-
py_file = self.api.input_stream(path)
62-
orc_file = self.api.orc.ORCFile(py_file)
47+
orc_file = self.api.orc.ORCFile(path)
6348

6449
result = orc_file.read(columns=columns, **kwargs).to_pandas()
6550

pandas/tests/io/test_orc.py

+9-29
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,17 @@
11
""" test orc compat """
22
import datetime
3-
import distutils
3+
from decimal import Decimal
44
import os
55

66
import numpy as np
77
import pytest
88

9-
from pandas.compat import is_platform_windows
10-
119
import pandas as pd
1210
import pandas.util.testing as tm
1311

1412
from pandas.io.orc import PyArrowImpl, get_engine, read_orc
1513

16-
try:
17-
import pyarrow # noqa
18-
19-
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
20-
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
21-
22-
_HAVE_PYARROW = True
23-
except ImportError:
24-
_HAVE_PYARROW = False
14+
pyarrow = pytest.importorskip("pyarrow")
2515

2616
pytestmark = pytest.mark.filterwarnings(
2717
"ignore:RangeIndex.* is deprecated:DeprecationWarning"
@@ -33,16 +23,7 @@ def dirpath(datapath):
3323
return datapath("io", "data", "orc")
3424

3525

36-
@pytest.fixture
37-
def pa():
38-
if not _HAVE_PYARROW:
39-
pytest.skip("pyarrow is not installed")
40-
if is_platform_windows():
41-
pytest.skip("pyarrow orc not available by default on windows")
42-
return "pyarrow"
43-
44-
45-
def test_options_get_engine(pa):
26+
def test_options_get_engine():
4627
assert isinstance(get_engine("pyarrow"), PyArrowImpl)
4728

4829
with pd.option_context("io.orc.engine", "pyarrow"):
@@ -61,7 +42,7 @@ def test_invalid_engine(dirpath):
6142
read_orc(inputfile, engine=engine, columns=["boolean1"])
6243

6344

64-
def test_orc_reader_empty(dirpath, pa):
45+
def test_orc_reader_empty(dirpath):
6546
columns = [
6647
"boolean1",
6748
"byte1",
@@ -94,7 +75,7 @@ def test_orc_reader_empty(dirpath, pa):
9475
tm.assert_equal(expected, got)
9576

9677

97-
def test_orc_reader_basic(dirpath, pa):
78+
def test_orc_reader_basic(dirpath):
9879
data = {
9980
"boolean1": np.array([False, True], dtype="bool"),
10081
"byte1": np.array([1, 100], dtype="int8"),
@@ -114,8 +95,7 @@ def test_orc_reader_basic(dirpath, pa):
11495
tm.assert_equal(expected, got)
11596

11697

117-
def test_orc_reader_decimal(dirpath, pa):
118-
from decimal import Decimal
98+
def test_orc_reader_decimal(dirpath):
11999

120100
# Only testing the first 10 rows of data
121101
data = {
@@ -143,7 +123,7 @@ def test_orc_reader_decimal(dirpath, pa):
143123
tm.assert_equal(expected, got)
144124

145125

146-
def test_orc_reader_date_low(dirpath, pa):
126+
def test_orc_reader_date_low(dirpath):
147127
data = {
148128
"time": np.array(
149129
[
@@ -184,7 +164,7 @@ def test_orc_reader_date_low(dirpath, pa):
184164
tm.assert_equal(expected, got)
185165

186166

187-
def test_orc_reader_date_high(dirpath, pa):
167+
def test_orc_reader_date_high(dirpath):
188168
data = {
189169
"time": np.array(
190170
[
@@ -225,7 +205,7 @@ def test_orc_reader_date_high(dirpath, pa):
225205
tm.assert_equal(expected, got)
226206

227207

228-
def test_orc_reader_snappy_compressed(dirpath, pa):
208+
def test_orc_reader_snappy_compressed(dirpath):
229209
data = {
230210
"int1": np.array(
231211
[

0 commit comments

Comments
 (0)