Skip to content

Commit 506fe53

Browse files
phoflmroeschke
andauthored
ERR: Raise ValueError when non-default index is given for orc format (#51828)
* ERR: Raise ValueError when non-default index is given for orc format * Use default index * Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 73dfc30 commit 506fe53

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ MultiIndex
184184

185185
I/O
186186
^^^
187-
-
187+
- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
188188
-
189189

190190
Period

pandas/io/orc.py

+16
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222

2323
import pandas as pd
24+
from pandas.core.indexes.api import default_index
2425

2526
from pandas.io.common import (
2627
get_handle,
@@ -190,6 +191,21 @@ def to_orc(
190191
if engine_kwargs is None:
191192
engine_kwargs = {}
192193

194+
# validate index
195+
# --------------
196+
197+
# validate that we have only a default index
198+
# raise on anything else as we don't serialize the index
199+
200+
if not df.index.equals(default_index(len(df))):
201+
raise ValueError(
202+
"orc does not support serializing a non-default index for the index; "
203+
"you can .reset_index() to make the index into column(s)"
204+
)
205+
206+
if df.index.name is not None:
207+
raise ValueError("orc does not serialize index meta-data on a default index")
208+
193209
# If unsupported dtypes are found raise NotImplementedError
194210
# In Pyarrow 8.0.0 this check will no longer be needed
195211
if pa_version_under8p0:

pandas/tests/io/test_orc.py

+18
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,21 @@ def test_orc_uri_path():
391391
uri = pathlib.Path(path).as_uri()
392392
result = read_orc(uri)
393393
tm.assert_frame_equal(result, expected)
394+
395+
396+
@pytest.mark.parametrize(
397+
"index",
398+
[
399+
pd.RangeIndex(start=2, stop=5, step=1),
400+
pd.RangeIndex(start=0, stop=3, step=1, name="non-default"),
401+
pd.Index([1, 2, 3]),
402+
],
403+
)
404+
def test_to_orc_non_default_index(index):
405+
df = pd.DataFrame({"a": [1, 2, 3]}, index=index)
406+
msg = (
407+
"orc does not support serializing a non-default index|"
408+
"orc does not serialize index meta-data"
409+
)
410+
with pytest.raises(ValueError, match=msg):
411+
df.to_orc()

0 commit comments

Comments
 (0)