Skip to content

Commit ca016ef

Browse files
committed
remove option for multiple backends & simplify tests
1 parent ad1bade commit ca016ef

File tree

4 files changed

+17
-107
lines changed

4 files changed

+17
-107
lines changed

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ matplotlib 2.2.2 Visualization
258258
openpyxl 2.4.8 Reading / writing for xlsx files
259259
pandas-gbq 0.8.0 Google Big Query access
260260
psycopg2 PostgreSQL engine for sqlalchemy
261-
pyarrow 0.12.0 Parquet, ORC, and feather reading / writing
261+
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
262262
pymysql 0.7.11 MySQL engine for sqlalchemy
263263
pyreadstat SPSS files (.sav) reading
264264
pytables 3.4.2 HDF5 reading / writing

pandas/core/config_init.py

-16
Original file line numberDiff line numberDiff line change
@@ -568,22 +568,6 @@ def use_inf_as_na_cb(key):
568568
validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]),
569569
)
570570

571-
572-
# Set up the io.orc specific configuration.
573-
orc_engine_doc = """
574-
: string
575-
The default orc reader/writer engine. Available options:
576-
'auto', 'pyarrow', the default is 'auto'
577-
"""
578-
579-
with cf.config_prefix("io.orc"):
580-
cf.register_option(
581-
"engine",
582-
"auto",
583-
orc_engine_doc,
584-
validator=is_one_of_factory(["auto", "pyarrow"]),
585-
)
586-
587571
# --------
588572
# Plotting
589573
# ---------

pandas/io/orc.py

+8-42
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,16 @@
11
""" orc compat """
22

33
import distutils
4-
from typing import List, Optional
4+
from typing import TYPE_CHECKING, List, Optional
55

66
from pandas.compat._optional import import_optional_dependency
77

8-
from pandas import DataFrame, get_option
98
from pandas._typing import FilePathOrBuffer
109

1110
from pandas.io.common import get_filepath_or_buffer
1211

13-
14-
def get_engine(engine: str) -> "PyArrowImpl":
15-
""" return our implementation """
16-
17-
if engine == "auto":
18-
engine = get_option("io.orc.engine")
19-
20-
if engine == "auto":
21-
# try engines in this order
22-
try:
23-
return PyArrowImpl()
24-
except ImportError:
25-
pass
26-
27-
raise ImportError(
28-
"Unable to find a usable engine; "
29-
"tried using: 'pyarrow'.\n"
30-
"pyarrow is required for orc "
31-
"support"
32-
)
33-
34-
if engine not in ["pyarrow"]:
35-
raise ValueError("engine must be 'pyarrow'")
36-
37-
return PyArrowImpl()
12+
if TYPE_CHECKING:
13+
from pandas import DataFrame
3814

3915

4016
class PyArrowImpl:
@@ -55,22 +31,15 @@ def __init__(self):
5531

5632
def read(
5733
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
58-
) -> DataFrame:
34+
) -> "DataFrame":
5935
path, _, _, _ = get_filepath_or_buffer(path)
60-
61-
py_file = self.api.input_stream(path)
62-
orc_file = self.api.orc.ORCFile(py_file)
63-
36+
orc_file = self.api.orc.ORCFile(path)
6437
result = orc_file.read(columns=columns, **kwargs).to_pandas()
65-
6638
return result
6739

6840

6941
def read_orc(
70-
path: FilePathOrBuffer,
71-
engine: str = "auto",
72-
columns: Optional[List[str]] = None,
73-
**kwargs,
42+
path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs,
7443
):
7544
"""
7645
Load an ORC object from the file path, returning a DataFrame.
@@ -91,18 +60,15 @@ def read_orc(
9160
By file-like object, we refer to objects with a ``read()`` method,
9261
such as a file handler (e.g. via builtin ``open`` function)
9362
or ``StringIO``.
94-
engine : {'auto', 'pyarrow'}, default 'auto'
95-
ORC library to use. If 'auto', then the option ``io.orc.engine`` is
96-
used. The default ``io.orc.engine`` behavior is to try 'pyarrow'.
9763
columns : list, default=None
9864
If not None, only these columns will be read from the file.
9965
**kwargs
100-
Any additional kwargs are passed to the engine.
66+
Any additional kwargs are passed to pyarrow.
10167
10268
Returns
10369
-------
10470
DataFrame
10571
"""
10672

107-
impl = get_engine(engine)
73+
impl = PyArrowImpl()
10874
return impl.read(path, columns=columns, **kwargs)

pandas/tests/io/test_orc.py

+8-48
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,15 @@
11
""" test orc compat """
22
import datetime
3-
import distutils
43
import os
54

65
import numpy as np
76
import pytest
87

9-
from pandas.compat import is_platform_windows
10-
118
import pandas as pd
9+
from pandas import read_orc
1210
import pandas.util.testing as tm
1311

14-
from pandas.io.orc import PyArrowImpl, get_engine, read_orc
15-
16-
try:
17-
import pyarrow # noqa
18-
19-
if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0":
20-
raise ImportError("pyarrow must be >= 0.13.0 for read_orc")
21-
22-
_HAVE_PYARROW = True
23-
except ImportError:
24-
_HAVE_PYARROW = False
12+
pytest.importorskip("pyarrow", minversion="0.13.0")
2513

2614
pytestmark = pytest.mark.filterwarnings(
2715
"ignore:RangeIndex.* is deprecated:DeprecationWarning"
@@ -33,35 +21,7 @@ def dirpath(datapath):
3321
return datapath("io", "data", "orc")
3422

3523

36-
@pytest.fixture
37-
def pa():
38-
if not _HAVE_PYARROW:
39-
pytest.skip("pyarrow is not installed")
40-
if is_platform_windows():
41-
pytest.skip("pyarrow orc not available by default on windows")
42-
return "pyarrow"
43-
44-
45-
def test_options_get_engine(pa):
46-
assert isinstance(get_engine("pyarrow"), PyArrowImpl)
47-
48-
with pd.option_context("io.orc.engine", "pyarrow"):
49-
assert isinstance(get_engine("auto"), PyArrowImpl)
50-
assert isinstance(get_engine("pyarrow"), PyArrowImpl)
51-
52-
with pd.option_context("io.orc.engine", "auto"):
53-
assert isinstance(get_engine("auto"), PyArrowImpl)
54-
assert isinstance(get_engine("pyarrow"), PyArrowImpl)
55-
56-
57-
def test_invalid_engine(dirpath):
58-
inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
59-
engine = "foo"
60-
with pytest.raises(ValueError):
61-
read_orc(inputfile, engine=engine, columns=["boolean1"])
62-
63-
64-
def test_orc_reader_empty(dirpath, pa):
24+
def test_orc_reader_empty(dirpath):
6525
columns = [
6626
"boolean1",
6727
"byte1",
@@ -94,7 +54,7 @@ def test_orc_reader_empty(dirpath, pa):
9454
tm.assert_equal(expected, got)
9555

9656

97-
def test_orc_reader_basic(dirpath, pa):
57+
def test_orc_reader_basic(dirpath):
9858
data = {
9959
"boolean1": np.array([False, True], dtype="bool"),
10060
"byte1": np.array([1, 100], dtype="int8"),
@@ -114,7 +74,7 @@ def test_orc_reader_basic(dirpath, pa):
11474
tm.assert_equal(expected, got)
11575

11676

117-
def test_orc_reader_decimal(dirpath, pa):
77+
def test_orc_reader_decimal(dirpath):
11878
from decimal import Decimal
11979

12080
# Only testing the first 10 rows of data
@@ -143,7 +103,7 @@ def test_orc_reader_decimal(dirpath, pa):
143103
tm.assert_equal(expected, got)
144104

145105

146-
def test_orc_reader_date_low(dirpath, pa):
106+
def test_orc_reader_date_low(dirpath):
147107
data = {
148108
"time": np.array(
149109
[
@@ -184,7 +144,7 @@ def test_orc_reader_date_low(dirpath, pa):
184144
tm.assert_equal(expected, got)
185145

186146

187-
def test_orc_reader_date_high(dirpath, pa):
147+
def test_orc_reader_date_high(dirpath):
188148
data = {
189149
"time": np.array(
190150
[
@@ -225,7 +185,7 @@ def test_orc_reader_date_high(dirpath, pa):
225185
tm.assert_equal(expected, got)
226186

227187

228-
def test_orc_reader_snappy_compressed(dirpath, pa):
188+
def test_orc_reader_snappy_compressed(dirpath):
229189
data = {
230190
"int1": np.array(
231191
[

0 commit comments

Comments
 (0)