Skip to content

Commit 6c9a33e

Browse files
committed
add categories_dtype to dtype string
1 parent d4aa357 commit 6c9a33e

File tree

16 files changed

+158
-127
lines changed

16 files changed

+158
-127
lines changed

pandas/_libs/lib.pyx

+6
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,12 @@ cdef object _try_infer_map(object dtype):
13881388
val = getattr(dtype, attr, None)
13891389
if val in _TYPE_MAP:
13901390
return _TYPE_MAP[val]
1391+
1392+
# CategoricalDtype may have name category[dtype], so not caught above
1393+
name = getattr(dtype, "name", None)
1394+
if name.startswith("category["):
1395+
return _TYPE_MAP["category"]
1396+
13911397
return None
13921398

13931399

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1999,7 +1999,7 @@ def _repr_categories_info(self) -> str:
19991999

20002000
def _repr_footer(self) -> str:
20012001
info = self._repr_categories_info()
2002-
return f"Length: {len(self)}\n{info}"
2002+
return f"Length: {len(self)}, dtype: {self.dtype}\n{info}"
20032003

20042004
def _get_repr(
20052005
self, length: bool = True, na_rep: str = "NaN", footer: bool = True

pandas/core/dtypes/common.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -502,9 +502,8 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
502502
FutureWarning,
503503
stacklevel=find_stack_level(),
504504
)
505-
if isinstance(arr_or_dtype, ExtensionDtype):
506-
# GH#33400 fastpath for dtype object
507-
return arr_or_dtype.name == "category"
505+
if isinstance(arr_or_dtype, CategoricalDtype):
506+
return True
508507

509508
if arr_or_dtype is None:
510509
return False

pandas/core/dtypes/dtypes.py

+30-15
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
202202
"""
203203

204204
# TODO: Document public vs. private API
205-
name = "category"
206205
type: type[CategoricalDtypeType] = CategoricalDtypeType
207206
kind: str_type = "O"
208207
str = "|O08"
@@ -315,12 +314,12 @@ def _from_values_or_dtype(
315314
if dtype is not None:
316315
# The dtype argument takes precedence over values.dtype (if any)
317316
if isinstance(dtype, str):
318-
if dtype == "category":
317+
if dtype.startswith("category"):
319318
if ordered is None and cls.is_dtype(values):
320319
# GH#49309 preserve orderedness
321320
ordered = values.dtype.ordered
322-
323-
dtype = CategoricalDtype(categories, ordered)
321+
cat_dtype = cls._get_categories_dtype_from_string(dtype)
322+
dtype = CategoricalDtype(categories, ordered, cat_dtype)
324323
else:
325324
raise ValueError(f"Unknown dtype {repr(dtype)}")
326325
elif categories is not None or ordered is not None:
@@ -371,20 +370,27 @@ def construct_from_string(cls, string: str_type) -> CategoricalDtype:
371370
# need ordered=None to ensure that operations specifying dtype="category" don't
372371
# override the ordered value for existing categoricals
373372

374-
if string == cls.name:
373+
if string == "category":
375374
return cls(ordered=None)
376375

377376
msg = f"Cannot construct a '{cls.__name__}' from '{string}'"
377+
categories_dtype = cls._get_categories_dtype_from_string(string)
378+
if categories_dtype is None:
379+
raise TypeError(msg)
380+
try:
381+
return cls(categories_dtype=categories_dtype)
382+
except (KeyError, TypeError, ValueError) as err:
383+
# keyError is if "categories_dtype" key is not found
384+
# TypeError if we pass a nonsense;
385+
raise TypeError(msg) from err
386+
387+
@classmethod
388+
def _get_categories_dtype_from_string(cls, string: str_type) -> str_type | None:
378389
match = cls._match.match(string)
379-
if match:
380-
d = match.groupdict()
381-
try:
382-
return cls(categories_dtype=d["categories_dtype"])
383-
except (KeyError, TypeError, ValueError) as err:
384-
# keyError is if "categories_dtype" key is not found
385-
# TypeError if we pass a nonsense;
386-
raise TypeError(msg) from err
387-
raise TypeError(msg)
390+
if match is None:
391+
return None
392+
d = match.groupdict()
393+
return d.get("categories_dtype")
388394

389395
@property
390396
def categories_dtype(self) -> Dtype:
@@ -435,7 +441,7 @@ def __eq__(self, other: Any) -> bool:
435441
6) Any other comparison returns False
436442
"""
437443
if isinstance(other, str):
438-
return other == self.name
444+
return other == self.name or other == "category"
439445
elif other is self:
440446
return True
441447
elif not (hasattr(other, "ordered") and hasattr(other, "categories")):
@@ -497,6 +503,15 @@ def __repr__(self) -> str_type:
497503
f"categories_dtype={self.categories_dtype})"
498504
)
499505

506+
@property
507+
def name(self) -> str_type:
508+
if self.categories is not None:
509+
return f"category[{self.categories.dtype}]"
510+
elif self.categories_dtype is not None:
511+
return f"category[{self.categories_dtype}]"
512+
else:
513+
return "category"
514+
500515
@cache_readonly
501516
def _hash_categories(self) -> int:
502517
from pandas.core.util.hashing import (

pandas/tests/arrays/categorical/test_operators.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def test_comparison_with_unknown_scalars(self):
169169
# for unequal comps, but not for equal/not equal
170170
cat = Categorical([1, 2, 3], ordered=True)
171171

172-
msg = "Invalid comparison between dtype=category and int"
172+
msg = r"Invalid comparison between dtype=category\[int64\] and int"
173173
with pytest.raises(TypeError, match=msg):
174174
cat < 4
175175
with pytest.raises(TypeError, match=msg):
@@ -398,6 +398,6 @@ def test_numeric_like_ops_series_arith(self, op, str_rep):
398398
def test_numeric_like_ops_series_invalid(self):
399399
# invalid ufunc
400400
s = Series(Categorical([1, 2, 3, 4]))
401-
msg = "Object with dtype category cannot perform the numpy op log"
401+
msg = r"Object with dtype category\[int64\] cannot perform the numpy op log"
402402
with pytest.raises(TypeError, match=msg):
403403
np.log(s)

0 commit comments

Comments
 (0)