From 2f2eba0b5faa473aa781b55fed6b2a19c8a5e045 Mon Sep 17 00:00:00 2001
From: "D.S. McNeil" <dsm054@gmail.com>
Date: Mon, 12 Nov 2018 19:54:11 -0500
Subject: [PATCH] BUG: Preserve categorical dtypes when melting (#15853)

Also add support for tile and not simply repeat.
---
 doc/source/whatsnew/v0.24.0.txt               |  2 +
 pandas/compat/numpy/function.py               |  4 ++
 pandas/core/arrays/categorical.py             | 15 ++++++-
 pandas/core/indexes/base.py                   | 44 ++++++++++++++++++-
 pandas/core/indexes/datetimelike.py           | 12 +++++
 pandas/core/series.py                         | 26 ++++++++++-
 .../arrays/categorical/test_analytics.py      |  8 ++++
 pandas/tests/indexes/test_base.py             | 20 +++++++++
 pandas/tests/reshape/test_melt.py             | 17 +++++++
 pandas/tests/series/test_analytics.py         | 21 +++++++++
 10 files changed, 166 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 20496c9fb3f31..0b6086a62d97f 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -211,6 +211,7 @@ Other Enhancements
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
 - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
 - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`)
+- :meth:`Index.tile`, :meth:`Series.tile`, and :meth:`Categorical.tile` were introduced, parallel to the repeat methods, to ease categorical melting (:issue:`15853`)
 - Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`)
 - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
   reflect changes from the `Pandas-GBQ library version 0.6.0
@@ -1357,6 +1358,7 @@ Reshaping
 - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`)
 - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
 - Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`).
+- Bug in :meth:`DataFrame.melt` causing loss of categorical status when melting with categorical id_vars columns (:issue:`15853`).
 
 .. _whatsnew_0240.bug_fixes.sparse:
 
diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py
index 30fdeca35faf3..0a38265227b07 100644
--- a/pandas/compat/numpy/function.py
+++ b/pandas/compat/numpy/function.py
@@ -211,6 +211,10 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
 validate_repeat = CompatValidator(REPEAT_DEFAULTS, fname='repeat',
                                   method='both', max_fname_arg_count=1)
 
+TILE_DEFAULTS = dict(axis=None)
+validate_tile = CompatValidator(TILE_DEFAULTS, fname='tile',
+                                method='both', max_fname_arg_count=1)
+
 ROUND_DEFAULTS = dict(out=None)
 validate_round = CompatValidator(ROUND_DEFAULTS, fname='round',
                                  method='both', max_fname_arg_count=1)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 4363f3ccb14e2..5e263dc57fb33 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2325,12 +2325,25 @@ def repeat(self, repeats, *args, **kwargs):
         See also
         --------
         numpy.ndarray.repeat
-
+        Categorical.tile
         """
         nv.validate_repeat(args, kwargs)
         codes = self._codes.repeat(repeats)
         return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
 
+    def tile(self, reps, *args, **kwargs):
+        """
+        Tile elements of a Categorical.
+
+        See also
+        --------
+        numpy.tile
+        Categorical.repeat
+        """
+        nv.validate_tile(args, kwargs)
+        codes = np.tile(self._codes, reps)
+        return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
+
     # Implement the ExtensionArray interface
     @property
     def _can_hold_na(self):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 263de57d32f31..c1713a15984a6 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -546,6 +546,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
     def _shallow_copy(self, values=None, **kwargs):
         if values is None:
             values = self.values
+
         attributes = self._get_attributes_dict()
         attributes.update(kwargs)
         if not len(values) and 'dtype' not in kwargs:
@@ -557,7 +558,6 @@ def _shallow_copy(self, values=None, **kwargs):
             # `self.values` returns `self` for tz-aware, so we need to unwrap
             #  more specifically
             values = values.asi8
-
         return self._simple_new(values, **attributes)
 
     def _shallow_copy_with_infer(self, values, **kwargs):
@@ -822,6 +822,7 @@ def repeat(self, repeats, *args, **kwargs):
         --------
         Series.repeat : Equivalent function for Series
         numpy.repeat : Underlying implementation
+        Index.tile : repeat the entire index as a group, not by element
 
         Examples
         --------
@@ -836,6 +837,47 @@ def repeat(self, repeats, *args, **kwargs):
         nv.validate_repeat(args, kwargs)
         return self._shallow_copy(self._values.repeat(repeats))
 
+    def tile(self, reps, *args, **kwargs):
+        """
+        Tile elements of an Index.
+
+        Returns a new index constructed by repeating the current index
+        the number of times given by reps.
+
+        .. versionadded:: 0.24.0
+
+        Parameters
+        ----------
+        reps : int
+            The number of repetitions of the element groups.
+        **kwargs
+            Additional keywords have no effect but might be accepted for
+            compatibility with numpy.
+
+        Returns
+        -------
+        pandas.Index
+            Newly created Index with tiled elements.
+
+        See Also
+        --------
+        Series.tile : Equivalent function for Series
+        numpy.tile : Underlying implementation
+        Index.repeat : repeat the index element by element, not as a group
+
+        Examples
+        --------
+        >>> idx = pd.Index([1, 2, 3])
+        >>> idx
+        Int64Index([1, 2, 3], dtype='int64')
+        >>> idx.tile(2)
+        Int64Index([1, 2, 3, 1, 2, 3], dtype='int64')
+        >>> idx.tile(3)
+        Int64Index([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype='int64')
+        """
+        nv.validate_tile(args, kwargs)
+        return self._shallow_copy(np.tile(self._values[:], reps))
+
     _index_shared_docs['where'] = """
         .. versionadded:: 0.19.0
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 4547f47314bad..e32fe380320eb 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -638,6 +638,18 @@ def repeat(self, repeats, *args, **kwargs):
         return self._shallow_copy(self.asi8.repeat(repeats),
                                   freq=freq)
 
+    def tile(self, reps, *args, **kwargs):
+        """
+        Analogous to numpy.tile
+        """
+        nv.validate_tile(args, kwargs)
+        if is_period_dtype(self):
+            freq = self.freq
+        else:
+            freq = None
+        return self._shallow_copy(np.tile(self.asi8, reps),
+                                  freq=freq)
+
     @Appender(_index_shared_docs['where'] % _index_doc_kwargs)
     def where(self, cond, other=None):
         other = _ensure_datetimelike_to_i8(other, to_utc=True)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 20e4720a3bde7..ad8616c9a370f 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -989,11 +989,12 @@ def _set_values(self, key, value):
 
     def repeat(self, repeats, *args, **kwargs):
         """
-        Repeat elements of an Series. Refer to `numpy.ndarray.repeat`
+        Repeat elements of a Series. Refer to `numpy.ndarray.repeat`
         for more information about the `repeats` argument.
 
         See also
         --------
+        pd.Series.tile
         numpy.ndarray.repeat
         """
         nv.validate_repeat(args, kwargs)
@@ -1002,6 +1003,29 @@ def repeat(self, repeats, *args, **kwargs):
         return self._constructor(new_values,
                                  index=new_index).__finalize__(self)
 
+    def tile(self, reps, *args, **kwargs):
+        """
+        Tile elements of a Series. Refer to `numpy.tile`
+        for more information about the `reps` argument, although
+        note that we do not support multidimensional tiling of Series.
+
+        See also
+        --------
+        pd.Series.repeat
+        numpy.tile
+        """
+        nv.validate_tile(args, kwargs)
+        new_index = self.index.tile(reps)
+        if is_categorical_dtype(self.dtype):
+            new_values = Categorical.from_codes(np.tile(self.cat.codes, reps),
+                                                categories=self.cat.categories,
+                                                ordered=self.cat.ordered)
+        else:
+            new_values = np.tile(self._values, reps)
+
+        return self._constructor(new_values,
+                                 index=new_index).__finalize__(self)
+
     def get_value(self, label, takeable=False):
         """Quickly retrieve single value at passed index label
 
diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
index ea6facd66a1a3..248abbbb688bc 100644
--- a/pandas/tests/arrays/categorical/test_analytics.py
+++ b/pandas/tests/arrays/categorical/test_analytics.py
@@ -308,6 +308,14 @@ def test_numpy_repeat(self):
         with pytest.raises(ValueError, match=msg):
             np.repeat(cat, 2, axis=1)
 
+    def test_tile(self):
+        # GH15853
+        cat = Categorical(["a", "b"], categories=["b", "a"], ordered=True)
+        exp = Categorical(["a", "b", "a", "b"], categories=["b", "a"],
+                          ordered=True)
+        res = cat.tile(2)
+        tm.assert_categorical_equal(res, exp)
+
     def test_isna(self):
         exp = np.array([False, False, True])
         c = Categorical(["a", "b", np.nan])
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 4a3efe22926f7..1ce92f0605cf3 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2485,6 +2485,26 @@ def test_repeat(self):
         result = index.repeat(repeats)
         tm.assert_index_equal(result, expected)
 
+    def test_tile(self):
+        reps = 2
+        index = pd.Index([1, 2, 3])
+        expected = pd.Index([1, 2, 3, 1, 2, 3])
+
+        result = index.tile(reps)
+        tm.assert_index_equal(result, expected)
+
+    def test_tile_datetimeindex(self):
+        index = pd.date_range("2018-01-01", "2018-01-03")
+        result = index.tile(2)
+        expected = pd.to_datetime(["2018-01-01", "2018-01-02",
+                                   "2018-01-03"] * 2)
+
+        tm.assert_index_equal(result, expected)
+
+        # Even if reps = 1, verify we lose frequency
+        one_result = index.tile(1)
+        assert one_result.freq is None
+
     @pytest.mark.parametrize("index", [
         pd.Index([np.nan]), pd.Index([np.nan, 1]),
         pd.Index([1, 2, np.nan]), pd.Index(['a', 'b', np.nan]),
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index 16ecb07c5f413..2a31bdde612ae 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -233,6 +233,23 @@ def test_pandas_dtypes(self, col):
         expected.columns = ['klass', 'col', 'attribute', 'value']
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize('id_vars', [['a'], ['b'], ['a', 'b']])
+    def test_categorical_id_vars(self, id_vars):
+        # GH 15853
+        df = DataFrame({"a": pd.Series(["a", "b", "c", "a", "d"],
+                                       dtype="category"),
+                        "b": pd.Series(pd.Categorical([0, 1, 1, 2, 1],
+                                                      categories=[0, 2, 1, 3],
+                                                      ordered=True)),
+                        "c": range(5), "d": np.arange(5.0, 0.0, -1)},
+                       columns=["a", "b", "c", "d"])
+
+        result = df.melt(id_vars=id_vars)
+        for column in id_vars:
+            num = len(df.columns) - len(id_vars)
+            expected = df[column].tile(num).reset_index(drop=True)
+            tm.assert_series_equal(result[column], expected)
+
 
 class TestLreshape(object):
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index a5a7cc2217864..afc1ec35443c6 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1395,6 +1395,27 @@ def test_numpy_repeat(self):
         with pytest.raises(ValueError, match=msg):
             np.repeat(s, 2, axis=0)
 
+    def test_tile(self):
+        s = Series(np.random.randn(3), index=['a', 'b', 'c'])
+
+        reps = s.tile(5)
+        exp = Series(np.tile(s.values, 5), index=np.tile(s.index.values, 5))
+        assert_series_equal(reps, exp)
+
+    def test_tile_categorical(self):
+        s = Series(pd.Categorical(["x", "y", "x", "z"],
+                                  categories=["x", "z", "y"],
+                                  ordered=True))
+        res_1 = s.tile(1)
+        assert_series_equal(s, res_1)
+
+        res_2 = s.tile(2)
+        exp_2 = Series(pd.Categorical(["x", "y", "x", "z"] * 2,
+                                      categories=s.cat.categories,
+                                      ordered=True),
+                       index=[0, 1, 2, 3] * 2)
+        assert_series_equal(res_2, exp_2)
+
     def test_searchsorted(self):
         s = Series([1, 2, 3])