diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 4d4b9e086e9e5..3865089703c71 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5464,6 +5464,25 @@ The above example creates a partitioned dataset that may look like: except OSError: pass +.. note:: + + * The parquet representation of ``StringDtype`` is the same, regardless of the storage. + * The data will be read in accordance with the ``string_storage`` settings. + +.. ipython:: python + + df1 = pd.DataFrame({"A": pd.array(['a', 'b'], dtype=pd.StringDtype("pyarrow"))}) + df2 = pd.DataFrame({"A": pd.array(['a', 'b'], dtype=pd.StringDtype("python"))}) + df1.to_parquet("test.parquet") + with pd.option_context("string_storage", "pyarrow"): + df3 = pd.read_parquet("test.parquet") + pd.testing.assert_frame_equal(df3, df1) + df2.to_parquet("test.parquet") + with pd.option_context("string_storage", "pyarrow"): + df4 = pd.read_parquet("test.parquet") + pd.testing.assert_frame_equal(df4, df1) + + .. _io.orc: ORC diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index c193df5118926..fa128ce983041 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -81,6 +81,11 @@ or convert from existing pandas data: s2 type(s2[0]) +.. note:: + + * The parquet representation of ``StringDtype`` is the same, regardless of the storage. + * The data will be read in accordance with the ``string_storage`` settings. + .. _text.differences: