Skip to content

Commit a6052eb

Browse files
committed
BUG: Retain tz-aware dtypes with melt
Add additional tests
1 parent df2e361 commit a6052eb

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,7 @@ Timezones
896896
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
897897
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
898898
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)
899+
- Bug in :func:`melt` that coverted tz-aware dtypes to tz-naive (:issue:`15785`)
899900

900901
Offsets
901902
^^^^^^^

pandas/core/reshape/melt.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414
import re
1515
from pandas.core.dtypes.missing import notna
16+
from pandas.core.dtypes.common import is_extension_type
1617
from pandas.core.tools.numeric import to_numeric
18+
from pandas.core.reshape.concat import concat
1719

1820

1921
@Appender(_shared_docs['melt'] %
@@ -70,7 +72,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
7072

7173
mdata = {}
7274
for col in id_vars:
73-
mdata[col] = np.tile(frame.pop(col).values, K)
75+
id_data = frame.pop(col)
76+
if is_extension_type(id_data):
77+
# Preserve pandas dtype by not converting to a numpy array
78+
id_data = concat([id_data] * K, ignore_index=True)
79+
else:
80+
id_data = np.tile(id_data.values, K)
81+
mdata[col] = id_data
7482

7583
mcolumns = id_vars + var_name + [value_name]
7684

pandas/tests/reshape/test_melt.py

+32
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,38 @@ def test_multiindex(self):
212212
res = self.df1.melt()
213213
assert res.columns.tolist() == ['CAP', 'low', 'value']
214214

215+
@pytest.mark.parametrize("col", [
216+
pd.Series(pd.date_range('2010', periods=5, tz='US/Pacific')),
217+
pd.Series(["a", "b", "c", "a", "d"], dtype="category")])
218+
def test_pandas_dtypes_id_var(self, col):
219+
# GH 15785
220+
# Pandas dtype in the id
221+
df = DataFrame({'klass': range(5),
222+
'col': col,
223+
'attr1': [1, 0, 0, 0, 0],
224+
'attr2': [0, 1, 0, 0, 0]})
225+
result = melt(df, id_vars=['klass', 'col'], var_name='attribute',
226+
value_name='value')
227+
expected = DataFrame({'klass': list(range(5)) * 2,
228+
'col': pd.concat([col] * 2, ignore_index=True),
229+
'attribute': ['attr1'] * 5 + ['attr2'] * 5,
230+
'value': [1, 0, 0, 0, 0] + [0, 1, 0, 0, 0]})
231+
tm.assert_frame_equal(result, expected)
232+
233+
# Pandas dtype in the column
234+
df = DataFrame({'klass': range(5),
235+
'col': col,
236+
'attr1': [1, 0, 0, 0, 0],
237+
'attr2': col})
238+
result = melt(df, id_vars=['klass', 'col'], var_name='attribute',
239+
value_name='value')
240+
expected = DataFrame({'klass': list(range(5)) * 2,
241+
'col': pd.concat([col] * 2, ignore_index=True),
242+
'attribute': ['attr1'] * 5 + ['attr2'] * 5,
243+
'value': pd.concat([pd.Series([1, 0, 0, 0, 0]),
244+
col], ignore_index=True)})
245+
tm.assert_frame_equal(result, expected)
246+
215247

216248
class TestLreshape(object):
217249

0 commit comments

Comments
 (0)