From 6ee556403b1ef00bdbe3f1c3d051df125a66efe9 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 10 Apr 2016 11:18:51 +0900
Subject: [PATCH] ENH: Add Index.str.get_dummies

---
 doc/source/text.rst             | 17 ++++++++++++++---
 doc/source/whatsnew/v0.18.1.txt |  7 +++++++
 pandas/core/strings.py          | 19 +++++--------------
 pandas/tests/test_strings.py    | 21 ++++++++++++++++-----
 4 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/doc/source/text.rst b/doc/source/text.rst
index 655df5c5e566c..d9f8d45c8aa75 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -354,16 +354,27 @@ Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take
    s4 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
    s4.str.contains('A', na=False)
 
+.. _text.indicator:
+
 Creating Indicator Variables
 ----------------------------
 
 You can extract dummy variables from string columns.
 For example if they are separated by a ``'|'``:
 
-  .. ipython:: python
+.. ipython:: python
+
+    s = pd.Series(['a', 'a|b', np.nan, 'a|c'])
+    s.str.get_dummies(sep='|')
+
+String ``Index`` also supports ``get_dummies`` which returns ``MultiIndex``.
+
+.. versionadded:: 0.18.1
+
+.. ipython:: python
 
-      s = pd.Series(['a', 'a|b', np.nan, 'a|c'])
-      s.str.get_dummies(sep='|')
+    idx = pd.Index(['a', 'a|b', np.nan, 'a|c'])
+    idx.str.get_dummies(sep='|')
 
 See also :func:`~pandas.get_dummies`.
 
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
index edbaeb65c45eb..48677f5105dda 100644
--- a/doc/source/whatsnew/v0.18.1.txt
+++ b/doc/source/whatsnew/v0.18.1.txt
@@ -66,6 +66,13 @@ Other Enhancements
    idx.take([2, -1])     # default, allow_fill=True, fill_value=None
    idx.take([2, -1], fill_value=True)
 
+- ``Index`` now supports ``.str.get_dummies()`` which returns ``MultiIndex``, see :ref:`Creating Indicator Variables <text.indicator>` (:issue:`10008`, :issue:`10103`)
+
+.. ipython:: python
+
+   idx = pd.Index(['a|b', 'a|c', 'b|c'])
+   idx.str.get_dummies('|')
+
 
 .. _whatsnew_0181.sparse:
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index e5d539821e3ca..66e4638a9e4b4 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -741,15 +741,6 @@ def str_get_dummies(arr, sep='|'):
     --------
     pandas.get_dummies
     """
-    from pandas.core.frame import DataFrame
-    from pandas.core.index import Index
-
-    # GH9980, Index.str does not support get_dummies() as it returns a frame
-    if isinstance(arr, Index):
-        raise TypeError("get_dummies is not supported for string methods on "
-                        "Index")
-
-    # TODO remove this hack?
     arr = arr.fillna('')
     try:
         arr = sep + arr + sep
@@ -766,7 +757,7 @@ def str_get_dummies(arr, sep='|'):
     for i, t in enumerate(tags):
         pat = sep + t + sep
         dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
-    return DataFrame(dummies, arr.index, tags)
+    return dummies, tags
 
 
 def str_join(arr, sep):
@@ -1356,9 +1347,9 @@ def cons_row(x):
             index = self._orig.index
             if expand:
                 cons = self._orig._constructor_expanddim
-                return cons(result, index=index)
+                return cons(result, columns=name, index=index)
             else:
-                # Must a Series
+                # Must be a Series
                 cons = self._orig._constructor
                 return cons(result, name=name, index=index)
 
@@ -1589,9 +1580,9 @@ def get_dummies(self, sep='|'):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
         data = self._orig.astype(str) if self._is_categorical else self._data
-        result = str_get_dummies(data, sep)
+        result, name = str_get_dummies(data, sep)
         return self._wrap_result(result, use_codes=(not self._is_categorical),
-                                 expand=True)
+                                 name=name, expand=True)
 
     @copy(str_translate)
     def translate(self, table, deletechars=None):
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index d61ae3681a880..1f9f7d43e8568 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -1237,12 +1237,15 @@ def test_get_dummies(self):
                              columns=list('7ab'))
         tm.assert_frame_equal(result, expected)
 
-        # GH9980
-        # Index.str does not support get_dummies() as it returns a frame
-        with tm.assertRaisesRegexp(TypeError, "not supported"):
-            idx = Index(['a|b', 'a|c', 'b|c'])
-            idx.str.get_dummies('|')
+        # GH9980, GH8028
+        idx = Index(['a|b', 'a|c', 'b|c'])
+        result = idx.str.get_dummies('|')
+
+        expected = MultiIndex.from_tuples([(1, 1, 0), (1, 0, 1),
+                                           (0, 1, 1)], names=('a', 'b', 'c'))
+        tm.assert_index_equal(result, expected)
 
+    def test_get_dummies_with_name_dummy(self):
         # GH 12180
         # Dummies named 'name' should work as expected
         s = Series(['a', 'b,name', 'b'])
@@ -1251,6 +1254,14 @@ def test_get_dummies(self):
                              columns=['a', 'b', 'name'])
         tm.assert_frame_equal(result, expected)
 
+        idx = Index(['a|b', 'name|c', 'b|name'])
+        result = idx.str.get_dummies('|')
+
+        expected = MultiIndex.from_tuples([(1, 1, 0, 0), (0, 0, 1, 1),
+                                           (0, 1, 0, 1)],
+                                          names=('a', 'b', 'c', 'name'))
+        tm.assert_index_equal(result, expected)
+
     def test_join(self):
         values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
         result = values.str.split('_').str.join('_')