update doc-links & add typing

jreback · jreback · commit 5b76edfce602 · 2019-11-17T18:20:37.000-05:00
diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
@@ -98,6 +98,13 @@ Parquet
 
    read_parquet
 
+ORC
+~~~
+.. autosummary::
+   :toctree: api/
+
+   read_orc
+
 SAS
 ~~~
 .. autosummary::
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -4856,6 +4856,20 @@ The above example creates a partitioned dataset that may look like:
    except OSError:
        pass
 
+.. _io.orc:
+
+ORC
+---
+
+.. versionadded:: 1.0.0
+
+Similar to the `parquet <io.parquet>` format, the `ORC Format <//https://orc.apache.org/>`__ binary columnar serialization
+for data frames. It is designed to make reading data frames efficient. Pandas provides *only* a reader for the
+ORC format, :func:`~pandas.read_orc`.
+
+See the documentation for `pyarrow <https://arrow.apache.org/docs/python/>`__ for more.
+
+
 .. _io.sql:
 
 SQL queries
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
@@ -1,16 +1,18 @@
 """ orc compat """
 
 import distutils
+from typing import List, Optional
 
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
 
 from pandas import DataFrame, get_option
+from pandas._typing import FilePathOrBuffer
 
 from pandas.io.common import get_filepath_or_buffer
 
 
-def get_engine(engine):
+def get_engine(engine: str) -> "PyArrowImpl":
     """ return our implementation """
 
     if engine == "auto":
@@ -42,7 +44,7 @@ class BaseImpl:
     api = None  # module
 
     @staticmethod
-    def validate_dataframe(df):
+    def validate_dataframe(df: DataFrame):
 
         if not isinstance(df, DataFrame):
             raise ValueError("to_orc only supports IO with DataFrames")
@@ -58,10 +60,12 @@ def validate_dataframe(df):
         if not valid_names:
             raise ValueError("Index level names must be strings")
 
-    def write(self, df, path, compression, **kwargs):
+    def write(self, df: DataFrame, path: FilePathOrBuffer, compression: str, **kwargs):
         raise AbstractMethodError(self)
 
-    def read(self, path, columns=None, **kwargs):
+    def read(
+        self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
+    ):
         raise AbstractMethodError(self)
 
 
@@ -81,7 +85,9 @@ def __init__(self):
 
         self.api = pyarrow
 
-    def read(self, path, columns=None, **kwargs):
+    def read(
+        self, path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs
+    ) -> DataFrame:
         path, _, _, _ = get_filepath_or_buffer(path)
 
         py_file = self.api.input_stream(path)
@@ -92,7 +98,12 @@ def read(self, path, columns=None, **kwargs):
         return result
 
 
-def read_orc(path, engine="auto", columns=None, **kwargs):
+def read_orc(
+    path: FilePathOrBuffer,
+    engine: str = "auto",
+    columns: Optional[List[str]] = None,
+    **kwargs,
+):
     """
     Load an ORC object from the file path, returning a DataFrame.