Skip to content

Commit 598c2e1

Browse files
Fix extension dtype index handling (#1333)
* Fix extension dtype index handling * mypy * always reset_index when inferring index dtypes * reset & use column schema if index is using extension dtype Co-authored-by: jaidisido <[email protected]>
1 parent b3c5676 commit 598c2e1

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

awswrangler/_data_types.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def pyarrow2pandas_extension( # pylint: disable=too-many-branches,too-many-retu
419419
return None
420420

421421

422-
def pyarrow_types_from_pandas(
422+
def pyarrow_types_from_pandas( # pylint: disable=too-many-branches
423423
df: pd.DataFrame, index: bool, ignore_cols: Optional[List[str]] = None, index_left: bool = False
424424
) -> Dict[str, pa.DataType]:
425425
"""Extract the related Pyarrow data types from any Pandas DataFrame."""
@@ -469,7 +469,19 @@ def pyarrow_types_from_pandas(
469469
# Filling indexes
470470
indexes: List[str] = []
471471
if index is True:
472-
for field in pa.Schema.from_pandas(df=df[[]], preserve_index=True):
472+
# Get index columns
473+
try:
474+
fields = pa.Schema.from_pandas(df=df[[]], preserve_index=True)
475+
except AttributeError as ae:
476+
if "'Index' object has no attribute 'head'" not in str(ae):
477+
raise ae
478+
# Get index fields from a new df with only index columns
479+
# Adding indexes as columns via .reset_index() because
480+
# pa.Schema.from_pandas(.., preserve_index=True) fails with
481+
# "'Index' object has no attribute 'head'" if using extension
482+
# dtypes on pandas 1.4.x
483+
fields = pa.Schema.from_pandas(df=df.reset_index().drop(columns=cols), preserve_index=False)
484+
for field in fields:
473485
name = str(field.name)
474486
_logger.debug("Inferring PyArrow type from index: %s", name)
475487
cols_dtypes[name] = field.type

0 commit comments

Comments
 (0)