forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfeather_format.py
136 lines (105 loc) · 3.76 KB
/
feather_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
""" feather-format compat """
from typing import AnyStr
from pandas._typing import (
FilePathOrBuffer,
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas import (
DataFrame,
Int64Index,
RangeIndex,
)
from pandas.core import generic
from pandas.io.common import get_handle
@doc(storage_options=generic._shared_docs["storage_options"])
def to_feather(
df: DataFrame,
path: FilePathOrBuffer[AnyStr],
storage_options: StorageOptions = None,
**kwargs,
):
"""
Write a DataFrame to the binary Feather format.
Parameters
----------
df : DataFrame
path : string file path, or file-like object
{storage_options}
.. versionadded:: 1.2.0
**kwargs :
Additional keywords passed to `pyarrow.feather.write_feather`.
.. versionadded:: 1.1.0
"""
import_optional_dependency("pyarrow")
from pyarrow import feather
if not isinstance(df, DataFrame):
raise ValueError("feather only support IO with DataFrames")
valid_types = {"string", "unicode"}
# validate index
# --------------
# validate that we have only a default index
# raise on anything else as we don't serialize the index
if not isinstance(df.index, (Int64Index, RangeIndex)):
typ = type(df.index)
raise ValueError(
f"feather does not support serializing {typ} "
"for the index; you can .reset_index() to make the index into column(s)"
)
if not df.index.equals(RangeIndex.from_range(range(len(df)))):
raise ValueError(
"feather does not support serializing a non-default index for the index; "
"you can .reset_index() to make the index into column(s)"
)
if df.index.name is not None:
raise ValueError(
"feather does not serialize index meta-data on a default index"
)
# validate columns
# ----------------
# must have value column names (strings only)
if df.columns.inferred_type not in valid_types:
raise ValueError("feather must have string column names")
with get_handle(
path, "wb", storage_options=storage_options, is_text=False
) as handles:
feather.write_feather(df, handles.handle, **kwargs)
@doc(storage_options=generic._shared_docs["storage_options"])
def read_feather(
path, columns=None, use_threads: bool = True, storage_options: StorageOptions = None
):
"""
Load a feather-format object from the file path.
Parameters
----------
path : str, path object or file-like object
Any valid string path is acceptable. The string could be a URL. Valid
URL schemes include http, ftp, s3, and file. For file URLs, a host is
expected. A local file could be:
``file://localhost/path/to/table.feather``.
If you want to pass in a path object, pandas accepts any
``os.PathLike``.
By file-like object, we refer to objects with a ``read()`` method,
such as a file handle (e.g. via builtin ``open`` function)
or ``StringIO``.
columns : sequence, default None
If not provided, all columns are read.
.. versionadded:: 0.24.0
use_threads : bool, default True
Whether to parallelize reading using multiple threads.
.. versionadded:: 0.24.0
{storage_options}
.. versionadded:: 1.2.0
Returns
-------
type of object stored in file
"""
import_optional_dependency("pyarrow")
from pyarrow import feather
with get_handle(
path, "rb", storage_options=storage_options, is_text=False
) as handles:
return feather.read_feather(
handles.handle, columns=columns, use_threads=bool(use_threads)
)