Skip to content

Commit 5b76edf

Browse files
committed
update doc-links & add typing
1 parent a5be781 commit 5b76edf

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

doc/source/reference/io.rst

+7
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ Parquet
9898

9999
read_parquet
100100

101+
ORC
102+
~~~
103+
.. autosummary::
104+
:toctree: api/
105+
106+
read_orc
107+
101108
SAS
102109
~~~
103110
.. autosummary::

doc/source/user_guide/io.rst

+14
Original file line numberDiff line numberDiff line change
@@ -4856,6 +4856,20 @@ The above example creates a partitioned dataset that may look like:
48564856
except OSError:
48574857
pass
48584858
4859+
.. _io.orc:
4860+
4861+
ORC
4862+
---
4863+
4864+
.. versionadded:: 1.0.0
4865+
4866+
Similar to the `parquet <io.parquet>` format, the `ORC Format <//https://orc.apache.org/>`__ binary columnar serialization
4867+
for data frames. It is designed to make reading data frames efficient. Pandas provides *only* a reader for the
4868+
ORC format, :func:`~pandas.read_orc`.
4869+
4870+
See the documentation for `pyarrow <https://arrow.apache.org/docs/python/>`__ for more.
4871+
4872+
48594873
.. _io.sql:
48604874

48614875
SQL queries

pandas/io/orc.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
""" orc compat """
22

33
import distutils
4+
from typing import List, Optional
45

56
from pandas.compat._optional import import_optional_dependency
67
from pandas.errors import AbstractMethodError
78

89
from pandas import DataFrame, get_option
10+
from pandas._typing import FilePathOrBuffer
911

1012
from pandas.io.common import get_filepath_or_buffer
1113

1214

13-
def get_engine(engine):
15+
def get_engine(engine: str) -> "PyArrowImpl":
1416
""" return our implementation """
1517

1618
if engine == "auto":
@@ -42,7 +44,7 @@ class BaseImpl:
4244
api = None # module
4345

4446
@staticmethod
45-
def validate_dataframe(df):
47+
def validate_dataframe(df: DataFrame):
4648

4749
if not isinstance(df, DataFrame):
4850
raise ValueError("to_orc only supports IO with DataFrames")
@@ -58,10 +60,12 @@ def validate_dataframe(df):
5860
if not valid_names:
5961
raise ValueError("Index level names must be strings")
6062

61-
def write(self, df, path, compression, **kwargs):
63+
def write(self, df: DataFrame, path: FilePathOrBuffer, compression: str, **kwargs):
6264
raise AbstractMethodError(self)
6365

64-
def read(self, path, columns=None, **kwargs):
66+
def read(
67+
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
68+
):
6569
raise AbstractMethodError(self)
6670

6771

@@ -81,7 +85,9 @@ def __init__(self):
8185

8286
self.api = pyarrow
8387

84-
def read(self, path, columns=None, **kwargs):
88+
def read(
89+
self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
90+
) -> DataFrame:
8591
path, _, _, _ = get_filepath_or_buffer(path)
8692

8793
py_file = self.api.input_stream(path)
@@ -92,7 +98,12 @@ def read(self, path, columns=None, **kwargs):
9298
return result
9399

94100

95-
def read_orc(path, engine="auto", columns=None, **kwargs):
101+
def read_orc(
102+
path: FilePathOrBuffer,
103+
engine: str = "auto",
104+
columns: Optional[List[str]] = None,
105+
**kwargs,
106+
):
96107
"""
97108
Load an ORC object from the file path, returning a DataFrame.
98109

0 commit comments

Comments
 (0)