1
+ from csv import QUOTE_NONNUMERIC
2
+ from functools import partial
1
3
import operator
2
4
from shutil import get_terminal_size
3
5
from typing import Dict , Hashable , List , Type , Union , cast
@@ -275,17 +277,17 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
275
277
Categories (3, int64): [1, 2, 3]
276
278
277
279
>>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
278
- [a, b, c, a, b, c ]
279
- Categories (3, object): [a, b, c ]
280
+ ['a', 'b', 'c', 'a', 'b', 'c' ]
281
+ Categories (3, object): ['a', 'b', 'c' ]
280
282
281
283
Ordered `Categoricals` can be sorted according to the custom order
282
284
of the categories and can have a min and max value.
283
285
284
286
>>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True,
285
287
... categories=['c', 'b', 'a'])
286
288
>>> c
287
- [a, b, c, a, b, c ]
288
- Categories (3, object): [c < b < a ]
289
+ ['a', 'b', 'c', 'a', 'b', 'c' ]
290
+ Categories (3, object): ['c' < 'b' < 'a' ]
289
291
>>> c.min()
290
292
'c'
291
293
"""
@@ -598,8 +600,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
598
600
--------
599
601
>>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True)
600
602
>>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype)
601
- [a, b, a, b ]
602
- Categories (2, object): [a < b ]
603
+ ['a', 'b', 'a', 'b' ]
604
+ Categories (2, object): ['a' < 'b' ]
603
605
"""
604
606
dtype = CategoricalDtype ._from_values_or_dtype (
605
607
categories = categories , ordered = ordered , dtype = dtype
@@ -659,13 +661,13 @@ def _set_categories(self, categories, fastpath=False):
659
661
--------
660
662
>>> c = pd.Categorical(['a', 'b'])
661
663
>>> c
662
- [a, b ]
663
- Categories (2, object): [a, b ]
664
+ ['a', 'b' ]
665
+ Categories (2, object): ['a', 'b' ]
664
666
665
667
>>> c._set_categories(pd.Index(['a', 'c']))
666
668
>>> c
667
- [a, c ]
668
- Categories (2, object): [a, c ]
669
+ ['a', 'c' ]
670
+ Categories (2, object): ['a', 'c' ]
669
671
"""
670
672
if fastpath :
671
673
new_dtype = CategoricalDtype ._from_fastpath (categories , self .ordered )
@@ -885,14 +887,14 @@ def rename_categories(self, new_categories, inplace=False):
885
887
categories not in the dictionary are passed through
886
888
887
889
>>> c.rename_categories({'a': 'A', 'c': 'C'})
888
- [A, A, b ]
889
- Categories (2, object): [A, b ]
890
+ ['A', 'A', 'b' ]
891
+ Categories (2, object): ['A', 'b' ]
890
892
891
893
You may also provide a callable to create the new categories
892
894
893
895
>>> c.rename_categories(lambda x: x.upper())
894
- [A, A, B ]
895
- Categories (2, object): [A, B ]
896
+ ['A', 'A', 'B' ]
897
+ Categories (2, object): ['A', 'B' ]
896
898
"""
897
899
inplace = validate_bool_kwarg (inplace , "inplace" )
898
900
cat = self if inplace else self .copy ()
@@ -1128,22 +1130,22 @@ def map(self, mapper):
1128
1130
--------
1129
1131
>>> cat = pd.Categorical(['a', 'b', 'c'])
1130
1132
>>> cat
1131
- [a, b, c ]
1132
- Categories (3, object): [a, b, c ]
1133
+ ['a', 'b', 'c' ]
1134
+ Categories (3, object): ['a', 'b', 'c' ]
1133
1135
>>> cat.map(lambda x: x.upper())
1134
- [A, B, C ]
1135
- Categories (3, object): [A, B, C ]
1136
+ ['A', 'B', 'C' ]
1137
+ Categories (3, object): ['A', 'B', 'C' ]
1136
1138
>>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'})
1137
- [first, second, third]
1138
- Categories (3, object): [first, second, third]
1139
+ [' first', ' second', ' third' ]
1140
+ Categories (3, object): [' first', ' second', ' third' ]
1139
1141
1140
1142
If the mapping is one-to-one the ordering of the categories is
1141
1143
preserved:
1142
1144
1143
1145
>>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True)
1144
1146
>>> cat
1145
- [a, b, c ]
1146
- Categories (3, object): [a < b < c ]
1147
+ ['a', 'b', 'c' ]
1148
+ Categories (3, object): ['a' < 'b' < 'c' ]
1147
1149
>>> cat.map({'a': 3, 'b': 2, 'c': 1})
1148
1150
[3, 2, 1]
1149
1151
Categories (3, int64): [3 < 2 < 1]
@@ -1778,29 +1780,29 @@ def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T:
1778
1780
--------
1779
1781
>>> cat = pd.Categorical(['a', 'a', 'b'])
1780
1782
>>> cat
1781
- [a, a, b ]
1782
- Categories (2, object): [a, b ]
1783
+ ['a', 'a', 'b' ]
1784
+ Categories (2, object): ['a', 'b' ]
1783
1785
1784
1786
Specify ``allow_fill==False`` to have negative indices mean indexing
1785
1787
from the right.
1786
1788
1787
1789
>>> cat.take([0, -1, -2], allow_fill=False)
1788
- [a, b, a ]
1789
- Categories (2, object): [a, b ]
1790
+ ['a', 'b', 'a' ]
1791
+ Categories (2, object): ['a', 'b' ]
1790
1792
1791
1793
With ``allow_fill=True``, indices equal to ``-1`` mean "missing"
1792
1794
values that should be filled with the `fill_value`, which is
1793
1795
``np.nan`` by default.
1794
1796
1795
1797
>>> cat.take([0, -1, -1], allow_fill=True)
1796
- [a , NaN, NaN]
1797
- Categories (2, object): [a, b ]
1798
+ ['a' , NaN, NaN]
1799
+ Categories (2, object): ['a', 'b' ]
1798
1800
1799
1801
The fill value can be specified.
1800
1802
1801
1803
>>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
1802
- [a, a, a ]
1803
- Categories (2, object): [a, b ]
1804
+ ['a', 'a', 'a' ]
1805
+ Categories (2, object): ['a', 'b' ]
1804
1806
1805
1807
Specifying a fill value that's not in ``self.categories``
1806
1808
will raise a ``ValueError``.
@@ -1872,13 +1874,16 @@ def _repr_categories(self):
1872
1874
)
1873
1875
from pandas .io .formats import format as fmt
1874
1876
1877
+ format_array = partial (
1878
+ fmt .format_array , formatter = None , quoting = QUOTE_NONNUMERIC
1879
+ )
1875
1880
if len (self .categories ) > max_categories :
1876
1881
num = max_categories // 2
1877
- head = fmt . format_array (self .categories [:num ], None )
1878
- tail = fmt . format_array (self .categories [- num :], None )
1882
+ head = format_array (self .categories [:num ])
1883
+ tail = format_array (self .categories [- num :])
1879
1884
category_strs = head + ["..." ] + tail
1880
1885
else :
1881
- category_strs = fmt . format_array (self .categories , None )
1886
+ category_strs = format_array (self .categories )
1882
1887
1883
1888
# Strip all leading spaces, which format_array adds for columns...
1884
1889
category_strs = [x .strip () for x in category_strs ]
@@ -2051,8 +2056,8 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
2051
2056
--------
2052
2057
>>> c = pd.Categorical(list('aabca'))
2053
2058
>>> c
2054
- [a, a, b, c, a ]
2055
- Categories (3, object): [a, b, c ]
2059
+ ['a', 'a', 'b', 'c', 'a' ]
2060
+ Categories (3, object): ['a', 'b', 'c' ]
2056
2061
>>> c.categories
2057
2062
Index(['a', 'b', 'c'], dtype='object')
2058
2063
>>> c.codes
@@ -2199,20 +2204,20 @@ def unique(self):
2199
2204
order of appearance.
2200
2205
2201
2206
>>> pd.Categorical(list("baabc")).unique()
2202
- [b, a, c ]
2203
- Categories (3, object): [b, a, c ]
2207
+ ['b', 'a', 'c' ]
2208
+ Categories (3, object): ['b', 'a', 'c' ]
2204
2209
2205
2210
>>> pd.Categorical(list("baabc"), categories=list("abc")).unique()
2206
- [b, a, c ]
2207
- Categories (3, object): [b, a, c ]
2211
+ ['b', 'a', 'c' ]
2212
+ Categories (3, object): ['b', 'a', 'c' ]
2208
2213
2209
2214
An ordered Categorical preserves the category ordering.
2210
2215
2211
2216
>>> pd.Categorical(
2212
2217
... list("baabc"), categories=list("abc"), ordered=True
2213
2218
... ).unique()
2214
- [b, a, c ]
2215
- Categories (3, object): [a < b < c ]
2219
+ ['b', 'a', 'c' ]
2220
+ Categories (3, object): ['a' < 'b' < 'c' ]
2216
2221
"""
2217
2222
# unlike np.unique, unique1d does not sort
2218
2223
unique_codes = unique1d (self .codes )
@@ -2465,7 +2470,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2465
2470
4 c
2466
2471
5 c
2467
2472
dtype: category
2468
- Categories (3, object): [a, b, c ]
2473
+ Categories (3, object): ['a', 'b', 'c' ]
2469
2474
2470
2475
>>> s.cat.categories
2471
2476
Index(['a', 'b', 'c'], dtype='object')
@@ -2478,7 +2483,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2478
2483
4 a
2479
2484
5 a
2480
2485
dtype: category
2481
- Categories (3, object): [c, b, a ]
2486
+ Categories (3, object): ['c', 'b', 'a' ]
2482
2487
2483
2488
>>> s.cat.reorder_categories(list("cba"))
2484
2489
0 a
@@ -2488,7 +2493,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2488
2493
4 c
2489
2494
5 c
2490
2495
dtype: category
2491
- Categories (3, object): [c, b, a ]
2496
+ Categories (3, object): ['c', 'b', 'a' ]
2492
2497
2493
2498
>>> s.cat.add_categories(["d", "e"])
2494
2499
0 a
@@ -2498,7 +2503,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2498
2503
4 c
2499
2504
5 c
2500
2505
dtype: category
2501
- Categories (5, object): [a, b, c, d, e ]
2506
+ Categories (5, object): ['a', 'b', 'c', 'd', 'e' ]
2502
2507
2503
2508
>>> s.cat.remove_categories(["a", "c"])
2504
2509
0 NaN
@@ -2508,7 +2513,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2508
2513
4 NaN
2509
2514
5 NaN
2510
2515
dtype: category
2511
- Categories (1, object): [b ]
2516
+ Categories (1, object): ['b' ]
2512
2517
2513
2518
>>> s1 = s.cat.add_categories(["d", "e"])
2514
2519
>>> s1.cat.remove_unused_categories()
@@ -2519,7 +2524,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2519
2524
4 c
2520
2525
5 c
2521
2526
dtype: category
2522
- Categories (3, object): [a, b, c ]
2527
+ Categories (3, object): ['a', 'b', 'c' ]
2523
2528
2524
2529
>>> s.cat.set_categories(list("abcde"))
2525
2530
0 a
@@ -2529,7 +2534,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2529
2534
4 c
2530
2535
5 c
2531
2536
dtype: category
2532
- Categories (5, object): [a, b, c, d, e ]
2537
+ Categories (5, object): ['a', 'b', 'c', 'd', 'e' ]
2533
2538
2534
2539
>>> s.cat.as_ordered()
2535
2540
0 a
@@ -2539,7 +2544,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2539
2544
4 c
2540
2545
5 c
2541
2546
dtype: category
2542
- Categories (3, object): [a < b < c ]
2547
+ Categories (3, object): ['a' < 'b' < 'c' ]
2543
2548
2544
2549
>>> s.cat.as_unordered()
2545
2550
0 a
@@ -2549,7 +2554,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
2549
2554
4 c
2550
2555
5 c
2551
2556
dtype: category
2552
- Categories (3, object): [a, b, c ]
2557
+ Categories (3, object): ['a', 'b', 'c' ]
2553
2558
"""
2554
2559
2555
2560
def __init__ (self , data ):
0 commit comments