Fix extension dtype index handling (#1333)

kukushking · jaidisido · web-flow · commit 598c2e1a998e · 2022-05-19T10:28:53.000+01:00
* Fix extension dtype index handling

* mypy

* always reset_index when inferring index dtypes

* reset &amp; use column schema if index is using extension dtype

Co-authored-by: jaidisido &lt;jaidisido@gmail.com&gt;
diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
@@ -419,7 +419,7 @@ def pyarrow2pandas_extension(  # pylint: disable=too-many-branches,too-many-retu
     return None
 
 
-def pyarrow_types_from_pandas(
+def pyarrow_types_from_pandas(  # pylint: disable=too-many-branches
     df: pd.DataFrame, index: bool, ignore_cols: Optional[List[str]] = None, index_left: bool = False
 ) -> Dict[str, pa.DataType]:
     """Extract the related Pyarrow data types from any Pandas DataFrame."""
@@ -469,7 +469,19 @@ def pyarrow_types_from_pandas(
     # Filling indexes
     indexes: List[str] = []
     if index is True:
-        for field in pa.Schema.from_pandas(df=df[[]], preserve_index=True):
+        # Get index columns
+        try:
+            fields = pa.Schema.from_pandas(df=df[[]], preserve_index=True)
+        except AttributeError as ae:
+            if "'Index' object has no attribute 'head'" not in str(ae):
+                raise ae
+            # Get index fields from a new df with only index columns
+            # Adding indexes as columns via .reset_index() because
+            # pa.Schema.from_pandas(.., preserve_index=True) fails with
+            # "'Index' object has no attribute 'head'" if using extension
+            # dtypes on pandas 1.4.x
+            fields = pa.Schema.from_pandas(df=df.reset_index().drop(columns=cols), preserve_index=False)
+        for field in fields:
             name = str(field.name)
             _logger.debug("Inferring PyArrow type from index: %s", name)
             cols_dtypes[name] = field.type