From e3f1687011d4de8f607906c23556cf1227519e08 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Tue, 21 Sep 2021 22:20:03 +0100
Subject: [PATCH 1/6] Write compliant Parquet with `pyarrow` if supported

---
 pandas/io/parquet.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index e92afd4e35ca1..0dfb6b6678074 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -13,6 +13,7 @@
     FilePathOrBuffer,
     StorageOptions,
 )
+from pandas.compat import pa_version_under4p0
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
@@ -179,6 +180,12 @@ def write(
             mode="wb",
             is_dir=partition_cols is not None,
         )
+
+        # Output compliant Parquet if PyArrow supports it and the user hasn't
+        # explicitly set the desired behavior
+        if not pa_version_under4p0 and "use_compliant_nested_type" not in kwargs:
+            kwargs["use_compliant_nested_type"] = True
+
         try:
             if partition_cols is not None:
                 # writes to multiple files under the given path

From 39010eb97661e2fec782a8a82eff685b48c3ebb1 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Wed, 22 Sep 2021 09:56:08 +0100
Subject: [PATCH 2/6] Import `pa_version_under5p0` in `compat` submodule

---
 pandas/compat/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 3233de8e3b6d1..c1fee9ad4121e 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -24,6 +24,7 @@
     pa_version_under2p0,
     pa_version_under3p0,
     pa_version_under4p0,
+    pa_version_under5p0,
 )
 
 PY39 = sys.version_info >= (3, 9)
@@ -155,4 +156,5 @@ def get_lzma_file(lzma):
     "pa_version_under2p0",
     "pa_version_under3p0",
     "pa_version_under4p0",
+    "pa_version_under5p0",
 ]

From cc7e6a89603cd675adc09e7bb7db55a3ab36f1c3 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Wed, 22 Sep 2021 11:26:33 +0100
Subject: [PATCH 3/6] Add whatsnew entry

---
 doc/source/whatsnew/v1.4.0.rst | 40 ++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 87f47dc65cea9..982312b278373 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -186,9 +186,45 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 
     res
 
-.. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
+.. _whatsnew_140.notable_bug_fixes.write_compliant_parquet_nested_type:
 
-notable_bug_fix3
+Write compliant Parquet nested types if possible
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When using :meth:`DataFrame.to_parquet` to write a DataFrame to Parquet, if any of the columns contained arrays
+of values the :mod:`pyarrow` engine would write a non-compliant format. This behavior is now fixed when the installed
+version of PyArrow is at least ``4.0.0``.
+
+https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types
+
+.. ipython:: python
+
+    import pandas as pd
+    import pyarrow.parquet as pq
+
+    df = pd.DataFrame({"int_array_col": [[1, 2, 3], [4, 5, 6]]})
+    df.to_parquet("/tmp/sample_df")
+    parquet_table = pq.read_table("/tmp/sample_df")
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [4]: parquet_table.schema.types
+    Out[4]:
+    [ListType(list<item: int64>)]
+
+*New behavior*:
+
+.. code-block:: ipython
+
+    In [4]: parquet_table.schema.types
+    Out[4]:
+    [ListType(list<element: int64>)]
+
+.. _whatsnew_140.notable_bug_fixes.notable_bug_fix4:
+
+notable_bug_fix4
 ^^^^^^^^^^^^^^^^
 
 .. ---------------------------------------------------------------------------

From 17b4d1794e60ae39127c663e5486b39067f1dc7a Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Wed, 22 Sep 2021 19:10:50 +0100
Subject: [PATCH 4/6] Add test for compliant Parquet

---
 pandas/tests/io/test_parquet.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 01715ee133e96..b9c225cc98542 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -931,6 +931,17 @@ def test_read_parquet_manager(self, pa, using_array_manager):
         else:
             assert isinstance(result._mgr, pd.core.internals.BlockManager)
 
+    @td.skip_if_no("pyarrow", min_version="4.0.0")
+    def test_list_column_results_in_compliant_parquet(self, pa):
+        # https://github.com/pandas-dev/pandas/issues/43689
+        df = pd.DataFrame({"a": [[1], [2]]})
+
+        with tm.ensure_clean() as path:
+            df.to_parquet(path, pa)
+            result = pyarrow.parquet.read_table(path)
+
+        assert str(result.schema.field_by_name("a").type) == "list<element: int64>"
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full):

From 5f06acdc240b3c90c23294364afc1b75aade9a74 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Wed, 22 Sep 2021 22:57:18 +0100
Subject: [PATCH 5/6] Add `check_round_trip` to test

---
 pandas/tests/io/test_parquet.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index b9c225cc98542..f263ca66b00a0 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -941,6 +941,7 @@ def test_list_column_results_in_compliant_parquet(self, pa):
             result = pyarrow.parquet.read_table(path)
 
         assert str(result.schema.field_by_name("a").type) == "list<element: int64>"
+        check_round_trip(df, pa)
 
 
 class TestParquetFastParquet(Base):

From 4025014cfc642573fad1c8ebf2dbffd065b2455a Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Thu, 30 Sep 2021 21:58:08 +0100
Subject: [PATCH 6/6] Replace long What's New entry with one liner

---
 doc/source/whatsnew/v1.4.0.rst | 41 +++-------------------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 982312b278373..a6eaa8f2f57f8 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -125,6 +125,7 @@ Other enhancements
 - Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
 - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
+- :meth:`DataFrame.to_parquet` now writes Parquet compliant data for columns which contain lists or arrays when using PyArrow 4.0.0 or greater (:issue:`43689`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -186,45 +187,9 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 
     res
 
-.. _whatsnew_140.notable_bug_fixes.write_compliant_parquet_nested_type:
+.. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
 
-Write compliant Parquet nested types if possible
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When using :meth:`DataFrame.to_parquet` to write a DataFrame to Parquet, if any of the columns contained arrays
-of values the :mod:`pyarrow` engine would write a non-compliant format. This behavior is now fixed when the installed
-version of PyArrow is at least ``4.0.0``.
-
-https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types
-
-.. ipython:: python
-
-    import pandas as pd
-    import pyarrow.parquet as pq
-
-    df = pd.DataFrame({"int_array_col": [[1, 2, 3], [4, 5, 6]]})
-    df.to_parquet("/tmp/sample_df")
-    parquet_table = pq.read_table("/tmp/sample_df")
-
-*Previous behavior*:
-
-.. code-block:: ipython
-
-    In [4]: parquet_table.schema.types
-    Out[4]:
-    [ListType(list<item: int64>)]
-
-*New behavior*:
-
-.. code-block:: ipython
-
-    In [4]: parquet_table.schema.types
-    Out[4]:
-    [ListType(list<element: int64>)]
-
-.. _whatsnew_140.notable_bug_fixes.notable_bug_fix4:
-
-notable_bug_fix4
+notable_bug_fix3
 ^^^^^^^^^^^^^^^^
 
 .. ---------------------------------------------------------------------------