From 2114a3c437953d7a375d9462a87cbea4188c490c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 7 Mar 2023 14:56:46 +0100 Subject: [PATCH 1/3] ERR: Raise ValueError when non-default index is given for orc format --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/io/orc.py | 16 ++++++++++++++++ pandas/tests/io/test_orc.py | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 12d35288d1ee6..35718f1f3b1ed 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -174,7 +174,7 @@ MultiIndex I/O ^^^ -- +- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`0`) - Period diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 1b9be9adc1196..3ce8fc690700c 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -29,6 +29,7 @@ is_unsigned_integer_dtype, ) +from pandas import RangeIndex from pandas.core.arrays import ArrowExtensionArray from pandas.core.frame import DataFrame @@ -209,6 +210,21 @@ def to_orc( if engine_kwargs is None: engine_kwargs = {} + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError( + "orc does not support serializing a non-default index for the index; " + "you can .reset_index() to make the index into column(s)" + ) + + if df.index.name is not None: + raise ValueError("orc does not serialize index meta-data on a default index") + # If unsupported dtypes are found raise NotImplementedError # In Pyarrow 8.0.0 this check will no longer be needed if pa_version_under8p0: diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 9db19d4eb8448..e7a3d173976e5 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -406,3 +406,21 @@ def test_orc_uri_path(): uri = pathlib.Path(path).as_uri() result = read_orc(uri) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + pd.RangeIndex(start=2, stop=5, step=1), + pd.RangeIndex(start=0, stop=3, step=1, name="non-default"), + pd.Index([1, 2, 3]), + ], +) +def test_to_orc_non_default_index(index): + df = pd.DataFrame({"a": [1, 2, 3]}, index=index) + msg = ( + "orc does not support serializing a non-default index|" + "orc does not serialize index meta-data" + ) + with pytest.raises(ValueError, match=msg): + df.to_orc() From 6e7b7aad65408cd688c7799ab26d0cebab8fcbd7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 7 Mar 2023 23:53:23 +0000 Subject: [PATCH 2/3] Use default index --- pandas/io/orc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 3ce8fc690700c..a948011a046dc 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -29,9 +29,9 @@ is_unsigned_integer_dtype, ) -from pandas import RangeIndex from pandas.core.arrays import ArrowExtensionArray from pandas.core.frame import DataFrame +from pandas.core.indexes.api import default_index from pandas.io.common import ( get_handle, @@ -216,7 +216,7 @@ def to_orc( # validate that we have only a default index # raise on anything else as we don't serialize the index - if not df.index.equals(RangeIndex.from_range(range(len(df)))): + if not df.index.equals(default_index(len(df))): raise ValueError( "orc does not support serializing a non-default index for the index; " "you can .reset_index() to make the index into column(s)" From 040a1a2531099703f008055dc6bf6915d89f0e8e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 13 Mar 2023 23:41:09 +0100 Subject: [PATCH 3/3] Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 35718f1f3b1ed..a0cf6410d3329 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -174,7 +174,7 @@ MultiIndex I/O ^^^ -- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`0`) +- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`) - Period