Skip to content

Commit 0710646

Browse files
committed
Allow non-default indexes in to_parquet.
...when supported by the underlying engine. Fixes pandas-dev#18581
1 parent fdba133 commit 0710646

File tree

1 file changed

+25
-23
lines changed

1 file changed

+25
-23
lines changed

pandas/io/parquet.py

+25-23
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,9 @@ def __init__(self):
5858
"\nor via pip\n"
5959
"pip install -U pyarrow\n")
6060

61-
self._pyarrow_lt_050 = (LooseVersion(pyarrow.__version__) <
62-
LooseVersion('0.5.0'))
63-
self._pyarrow_lt_060 = (LooseVersion(pyarrow.__version__) <
64-
LooseVersion('0.6.0'))
61+
self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
62+
self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
63+
self._pyarrow_lt_070 = LooseVersion(pyarrow.__version__) < '0.7.0'
6564
self.api = pyarrow
6665

6766
def write(self, df, path, compression='snappy',
@@ -149,27 +148,30 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
149148

150149
valid_types = {'string', 'unicode'}
151150

152-
# validate index
153-
# --------------
154-
155151
# validate that we have only a default index
156152
# raise on anything else as we don't serialize the index
157-
158-
if not isinstance(df.index, Int64Index):
159-
raise ValueError("parquet does not support serializing {} "
160-
"for the index; you can .reset_index()"
161-
"to make the index into column(s)".format(
162-
type(df.index)))
163-
164-
if not df.index.equals(RangeIndex.from_range(range(len(df)))):
165-
raise ValueError("parquet does not support serializing a "
166-
"non-default index for the index; you "
167-
"can .reset_index() to make the index "
168-
"into column(s)")
169-
170-
if df.index.name is not None:
171-
raise ValueError("parquet does not serialize index meta-data on a "
172-
"default index")
153+
# *unless* we're using pyarrow >= 0.7.1 which does support multi-indexes
154+
if impl.api.__name__ == 'pyarrow' and not impl._pyarrow_lt_070:
155+
validate_index = False
156+
else:
157+
validate_index = True
158+
159+
if validate_index:
160+
if not isinstance(df.index, Int64Index):
161+
raise ValueError("parquet does not support serializing {} "
162+
"for the index; you can .reset_index()"
163+
"to make the index into column(s)".format(
164+
type(df.index)))
165+
166+
if not df.index.equals(RangeIndex.from_range(range(len(df)))):
167+
raise ValueError("parquet does not support serializing a "
168+
"non-default index for the index; you "
169+
"can .reset_index() to make the index "
170+
"into column(s)")
171+
172+
if df.index.name is not None:
173+
raise ValueError("parquet does not serialize index meta-data on a "
174+
"default index")
173175

174176
# validate columns
175177
# ----------------

0 commit comments

Comments
 (0)