pandas-dev · jreback · Jul 20, 2018 · Jul 20, 2018 · Jul 20, 2018 · Jul 20, 2018
diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
@@ -1,7 +1,9 @@
+import string
 from itertools import product
 
 import numpy as np
 from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
+import pandas as pd
 
 from .pandas_vb_common import setup  # noqa
 
@@ -132,3 +134,19 @@ def setup(self):
 
     def time_pivot_table(self):
         self.df.pivot_table(index='key1', columns=['key2', 'key3'])
+
+
+class GetDummies(object):
+    goal_time = 0.2
+
+    def setup(self):
+        categories = list(string.ascii_letters[:12])
+        s = pd.Series(np.random.choice(categories, size=1_000_000),
+                      dtype=pd.api.types.CategoricalDtype(categories))
+        self.s = s
+
+    def time_get_dummies_1d(self):
+        pd.get_dummies(self.s, sparse=False)
+
+    def time_get_dummies_1d_sparse(self):
+        pd.get_dummies(self.s, sparse=True)
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -343,7 +343,7 @@ Performance Improvements
 - Improved performance of :meth:`HDFStore.groups` (and dependent functions like
   :meth:`~HDFStore.keys`.  (i.e. ``x in store`` checks are much faster)
   (:issue:`21372`)
--
+- Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`)
 
 .. _whatsnew_0240.docs:
 

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -940,10 +940,11 @@ def get_empty_Frame(data, sparse):
         sparse_series = {}
         N = len(data)
         sp_indices = [[] for _ in range(len(dummy_cols))]
-        for ndx, code in enumerate(codes):
-            if code == -1:
-                # Blank entries if not dummy_na and code == -1, #GH4446
-                continue
+        mask = codes != -1
+        codes = codes[mask]
+        n_idx = np.arange(N)[mask]
+
+        for ndx, code in zip(n_idx, codes):
             sp_indices[code].append(ndx)
 
         if drop_first: