Skip to content

Commit 4777800

Browse files
michaelayejreback
authored andcommitted
PERF: Checking for length of categories before doing string conversion. fixes #11305
1 parent 49cd89b commit 4777800

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

asv_bench/benchmarks/categoricals.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .pandas_vb_common import *
22
import string
33

4+
45
class concat_categorical(object):
56
goal_time = 0.2
67

@@ -26,6 +27,7 @@ def time_value_counts(self):
2627
def time_value_counts_dropna(self):
2728
self.ts.value_counts(dropna=True)
2829

30+
2931
class categorical_constructor(object):
3032
goal_time = 0.2
3133

@@ -43,3 +45,16 @@ def time_regular_constructor(self):
4345
def time_fastpath(self):
4446
Categorical(self.codes, self.cat_idx, fastpath=True)
4547

48+
49+
class categorical_rendering(object):
50+
goal_time = 3e-3
51+
52+
def setup(self):
53+
n = 1000
54+
items = [str(i) for i in range(n)]
55+
s = pd.Series(items, dtype='category')
56+
df = pd.DataFrame({'C': s, 'data': np.random.randn(n)})
57+
self.data = df[df.C == '20']
58+
59+
def time_rendering(self):
60+
str(self.data.C)

doc/source/whatsnew/v0.17.1.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
v0.17.1 (November ??, 2015)
44
---------------------------
55

6-
This is a minor bug-fix release from 0.17.0 and includes a a large number of
6+
This is a minor bug-fix release from 0.17.0 and includes a large number of
77
bug fixes along several new features, enhancements, and performance improvements.
88
We recommend that all users upgrade to this version.
99

@@ -64,9 +64,11 @@ Performance Improvements
6464
- Improved performance of ``rolling_median`` (:issue:`11450`)
6565

6666
- Improved performance to ``to_excel`` (:issue:`11352`)
67+
- Performance bug in repr of ``Categorical`` categories, which was rendering the strings before chopping them for display (:issue:`11305`)
6768

6869
.. _whatsnew_0171.bug_fixes:
6970

71+
7072
Bug Fixes
7173
~~~~~~~~~
7274

pandas/core/categorical.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1390,12 +1390,13 @@ def _repr_categories(self):
13901390
max_categories = (10 if get_option("display.max_categories") == 0
13911391
else get_option("display.max_categories"))
13921392
from pandas.core import format as fmt
1393-
category_strs = fmt.format_array(self.categories, None)
1394-
if len(category_strs) > max_categories:
1393+
if len(self.categories) > max_categories:
13951394
num = max_categories // 2
1396-
head = category_strs[:num]
1397-
tail = category_strs[-(max_categories - num):]
1395+
head = fmt.format_array(self.categories[:num], None)
1396+
tail = fmt.format_array(self.categories[-num:], None)
13981397
category_strs = head + ["..."] + tail
1398+
else:
1399+
category_strs = fmt.format_array(self.categories, None)
13991400

14001401
# Strip all leading spaces, which format_array adds for columns...
14011402
category_strs = [x.strip() for x in category_strs]

0 commit comments

Comments
 (0)