pandas-dev · judahrand · Sep 21, 2021 · Sep 22, 2021 · Sep 22, 2021 · Sep 22, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -186,9 +186,45 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 
     res
 
-.. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
+.. _whatsnew_140.notable_bug_fixes.write_compliant_parquet_nested_type:
 
-notable_bug_fix3
+Write compliant Parquet nested types if possible
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When using :meth:`DataFrame.to_parquet` to write a DataFrame to Parquet, if any of the columns contained arrays
+of values the :mod:`pyarrow` engine would write a non-compliant format. This behavior is now fixed when the installed
+version of PyArrow is at least ``4.0.0``.
+
+https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types
+
+.. ipython:: python
+
+    import pandas as pd
+    import pyarrow.parquet as pq
+
+    df = pd.DataFrame({"int_array_col": [[1, 2, 3], [4, 5, 6]]})
+    df.to_parquet("/tmp/sample_df")
+    parquet_table = pq.read_table("/tmp/sample_df")
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [4]: parquet_table.schema.types
+    Out[4]:
+    [ListType(list<item: int64>)]
+
+*New behavior*:
+
+.. code-block:: ipython
+
+    In [4]: parquet_table.schema.types
+    Out[4]:
+    [ListType(list<element: int64>)]
+
+.. _whatsnew_140.notable_bug_fixes.notable_bug_fix4:
+
+notable_bug_fix4
 ^^^^^^^^^^^^^^^^
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -24,6 +24,7 @@
     pa_version_under2p0,
     pa_version_under3p0,
     pa_version_under4p0,
+    pa_version_under5p0,
 )
 
 PY39 = sys.version_info >= (3, 9)
@@ -155,4 +156,5 @@ def get_lzma_file(lzma):
     "pa_version_under2p0",
     "pa_version_under3p0",
     "pa_version_under4p0",
+    "pa_version_under5p0",
 ]
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -13,6 +13,7 @@
     FilePathOrBuffer,
     StorageOptions,
 )
+from pandas.compat import pa_version_under4p0
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
@@ -179,6 +180,12 @@ def write(
             mode="wb",
             is_dir=partition_cols is not None,
         )
+
+        # Output compliant Parquet if PyArrow supports it and the user hasn't
+        # explicitly set the desired behavior
+        if not pa_version_under4p0 and "use_compliant_nested_type" not in kwargs:
+            kwargs["use_compliant_nested_type"] = True
+
         try:
             if partition_cols is not None:
                 # writes to multiple files under the given path

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -931,6 +931,17 @@ def test_read_parquet_manager(self, pa, using_array_manager):
         else:
             assert isinstance(result._mgr, pd.core.internals.BlockManager)
 
+    @td.skip_if_no("pyarrow", min_version="4.0.0")
+    def test_list_column_results_in_compliant_parquet(self, pa):
+        # https://github.com/pandas-dev/pandas/issues/43689
+        df = pd.DataFrame({"a": [[1], [2]]})
+
+        with tm.ensure_clean() as path:
+            df.to_parquet(path, pa)
+            result = pyarrow.parquet.read_table(path)
+
+        assert str(result.schema.field_by_name("a").type) == "list<element: int64>"
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full):