|
44 | 44 | pandas_dtype,
|
45 | 45 | )
|
46 | 46 | from pandas.core.dtypes.dtypes import (
|
| 47 | + ArrowDtype, |
47 | 48 | CategoricalDtype,
|
| 49 | + CategoricalDtypeType, |
48 | 50 | ExtensionDtype,
|
49 | 51 | )
|
50 | 52 | from pandas.core.dtypes.generic import (
|
@@ -443,24 +445,32 @@ def __init__(
|
443 | 445 | values = arr
|
444 | 446 |
|
445 | 447 | if dtype.categories is None:
|
446 |
| - if not isinstance(values, ABCIndex): |
447 |
| - # in particular RangeIndex xref test_index_equal_range_categories |
448 |
| - values = sanitize_array(values, None) |
449 |
| - try: |
450 |
| - codes, categories = factorize(values, sort=True) |
451 |
| - except TypeError as err: |
452 |
| - codes, categories = factorize(values, sort=False) |
453 |
| - if dtype.ordered: |
454 |
| - # raise, as we don't have a sortable data structure and so |
455 |
| - # the user should give us one by specifying categories |
456 |
| - raise TypeError( |
457 |
| - "'values' is not ordered, please " |
458 |
| - "explicitly specify the categories order " |
459 |
| - "by passing in a categories argument." |
460 |
| - ) from err |
461 |
| - |
462 |
| - # we're inferring from values |
463 |
| - dtype = CategoricalDtype(categories, dtype.ordered) |
| 448 | + if isinstance(values.dtype, ArrowDtype) and issubclass( |
| 449 | + values.dtype.type, CategoricalDtypeType |
| 450 | + ): |
| 451 | + arr = values._pa_array.combine_chunks() |
| 452 | + categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype) |
| 453 | + codes = arr.indices.to_numpy() |
| 454 | + dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered) |
| 455 | + else: |
| 456 | + if not isinstance(values, ABCIndex): |
| 457 | + # in particular RangeIndex xref test_index_equal_range_categories |
| 458 | + values = sanitize_array(values, None) |
| 459 | + try: |
| 460 | + codes, categories = factorize(values, sort=True) |
| 461 | + except TypeError as err: |
| 462 | + codes, categories = factorize(values, sort=False) |
| 463 | + if dtype.ordered: |
| 464 | + # raise, as we don't have a sortable data structure and so |
| 465 | + # the user should give us one by specifying categories |
| 466 | + raise TypeError( |
| 467 | + "'values' is not ordered, please " |
| 468 | + "explicitly specify the categories order " |
| 469 | + "by passing in a categories argument." |
| 470 | + ) from err |
| 471 | + |
| 472 | + # we're inferring from values |
| 473 | + dtype = CategoricalDtype(categories, dtype.ordered) |
464 | 474 |
|
465 | 475 | elif isinstance(values.dtype, CategoricalDtype):
|
466 | 476 | old_codes = extract_array(values)._codes
|
|
0 commit comments