|
20 | 20 | _multiprocess_can_split_ = True
|
21 | 21 |
|
22 | 22 |
|
23 |
| -def test_melt(): |
24 |
| - df = tm.makeTimeDataFrame()[:10] |
25 |
| - df['id1'] = (df['A'] > 0).astype(np.int64) |
26 |
| - df['id2'] = (df['B'] > 0).astype(np.int64) |
27 |
| - |
28 |
| - var_name = 'var' |
29 |
| - value_name = 'val' |
30 |
| - |
31 |
| - # Default column names |
32 |
| - result = melt(df) |
33 |
| - result1 = melt(df, id_vars=['id1']) |
34 |
| - result2 = melt(df, id_vars=['id1', 'id2']) |
35 |
| - result3 = melt(df, id_vars=['id1', 'id2'], |
36 |
| - value_vars='A') |
37 |
| - result4 = melt(df, id_vars=['id1', 'id2'], |
38 |
| - value_vars=['A', 'B']) |
39 |
| - |
40 |
| - expected4 = DataFrame({'id1': df['id1'].tolist() * 2, |
41 |
| - 'id2': df['id2'].tolist() * 2, |
42 |
| - 'variable': ['A']*10 + ['B']*10, |
43 |
| - 'value': df['A'].tolist() + df['B'].tolist()}, |
44 |
| - columns=['id1', 'id2', 'variable', 'value']) |
45 |
| - tm.assert_frame_equal(result4, expected4) |
46 |
| - |
47 |
| - # Supply custom name for the 'variable' column |
48 |
| - result5 = melt(df, var_name=var_name) |
49 |
| - result6 = melt(df, id_vars=['id1'], var_name=var_name) |
50 |
| - result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) |
51 |
| - result8 = melt(df, id_vars=['id1', 'id2'], |
52 |
| - value_vars='A', var_name=var_name) |
53 |
| - result9 = melt(df, id_vars=['id1', 'id2'], |
54 |
| - value_vars=['A', 'B'], var_name=var_name) |
55 |
| - |
56 |
| - expected9 = DataFrame({'id1': df['id1'].tolist() * 2, |
57 |
| - 'id2': df['id2'].tolist() * 2, |
58 |
| - var_name: ['A']*10 + ['B']*10, |
59 |
| - 'value': df['A'].tolist() + df['B'].tolist()}, |
60 |
| - columns=['id1', 'id2', var_name, 'value']) |
61 |
| - tm.assert_frame_equal(result9, expected9) |
62 |
| - |
63 |
| - # Supply custom name for the 'value' column |
64 |
| - result10 = melt(df, value_name=value_name) |
65 |
| - result11 = melt(df, id_vars=['id1'], value_name=value_name) |
66 |
| - result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) |
67 |
| - result13 = melt(df, id_vars=['id1', 'id2'], |
68 |
| - value_vars='A', value_name=value_name) |
69 |
| - result14 = melt(df, id_vars=['id1', 'id2'], |
70 |
| - value_vars=['A', 'B'], value_name=value_name) |
71 |
| - |
72 |
| - expected14 = DataFrame({'id1': df['id1'].tolist() * 2, |
73 |
| - 'id2': df['id2'].tolist() * 2, |
74 |
| - 'variable': ['A']*10 + ['B']*10, |
75 |
| - value_name: df['A'].tolist() + df['B'].tolist()}, |
76 |
| - columns=['id1', 'id2', 'variable', value_name]) |
77 |
| - tm.assert_frame_equal(result14, expected14) |
78 |
| - |
79 |
| - # Supply custom names for the 'variable' and 'value' columns |
80 |
| - result15 = melt(df, var_name=var_name, value_name=value_name) |
81 |
| - result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) |
82 |
| - result17 = melt(df, id_vars=['id1', 'id2'], |
83 |
| - var_name=var_name, value_name=value_name) |
84 |
| - result18 = melt(df, id_vars=['id1', 'id2'], |
85 |
| - value_vars='A', var_name=var_name, value_name=value_name) |
86 |
| - result19 = melt(df, id_vars=['id1', 'id2'], |
87 |
| - value_vars=['A', 'B'], var_name=var_name, value_name=value_name) |
88 |
| - |
89 |
| - expected19 = DataFrame({'id1': df['id1'].tolist() * 2, |
90 |
| - 'id2': df['id2'].tolist() * 2, |
91 |
| - var_name: ['A']*10 + ['B']*10, |
92 |
| - value_name: df['A'].tolist() + df['B'].tolist()}, |
93 |
| - columns=['id1', 'id2', var_name, value_name]) |
94 |
| - tm.assert_frame_equal(result19, expected19) |
95 |
| - |
96 |
| -def test_convert_dummies(): |
97 |
| - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', |
98 |
| - 'foo', 'bar', 'foo', 'foo'], |
99 |
| - 'B': ['one', 'one', 'two', 'three', |
100 |
| - 'two', 'two', 'one', 'three'], |
101 |
| - 'C': np.random.randn(8), |
102 |
| - 'D': np.random.randn(8)}) |
103 |
| - |
104 |
| - result = convert_dummies(df, ['A', 'B']) |
105 |
| - result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') |
106 |
| - |
107 |
| - expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], |
108 |
| - 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], |
109 |
| - 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], |
110 |
| - 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], |
111 |
| - 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], |
112 |
| - 'C': df['C'].values, |
113 |
| - 'D': df['D'].values}, |
114 |
| - columns=result.columns, dtype=float) |
115 |
| - expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) |
116 |
| - |
117 |
| - tm.assert_frame_equal(result, expected) |
118 |
| - tm.assert_frame_equal(result2, expected2) |
119 |
| - |
120 |
| - |
121 |
| -class Test_lreshape(unittest.TestCase): |
| 23 | +class TestMelt(unittest.TestCase): |
| 24 | + |
| 25 | + def setUp(self): |
| 26 | + self.df = tm.makeTimeDataFrame()[:10] |
| 27 | + self.df['id1'] = (self.df['A'] > 0).astype(np.int64) |
| 28 | + self.df['id2'] = (self.df['B'] > 0).astype(np.int64) |
| 29 | + |
| 30 | + self.var_name = 'var' |
| 31 | + self.value_name = 'val' |
| 32 | + |
| 33 | + def test_default_col_names(self): |
| 34 | + result = melt(self.df) |
| 35 | + self.assertEqual(result.columns.tolist(), ['variable', 'value']) |
| 36 | + |
| 37 | + result1 = melt(self.df, id_vars=['id1']) |
| 38 | + self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value']) |
| 39 | + |
| 40 | + result2 = melt(self.df, id_vars=['id1', 'id2']) |
| 41 | + self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) |
| 42 | + |
| 43 | + def test_value_vars(self): |
| 44 | + result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') |
| 45 | + self.assertEqual(len(result3), 10) |
| 46 | + |
| 47 | + result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) |
| 48 | + expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, |
| 49 | + 'id2': self.df['id2'].tolist() * 2, |
| 50 | + 'variable': ['A']*10 + ['B']*10, |
| 51 | + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, |
| 52 | + columns=['id1', 'id2', 'variable', 'value']) |
| 53 | + tm.assert_frame_equal(result4, expected4) |
| 54 | + |
| 55 | + def test_custom_var_name(self): |
| 56 | + result5 = melt(self.df, var_name=self.var_name) |
| 57 | + self.assertEqual(result5.columns.tolist(), ['var', 'value']) |
| 58 | + |
| 59 | + result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) |
| 60 | + self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) |
| 61 | + |
| 62 | + result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) |
| 63 | + self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) |
| 64 | + |
| 65 | + result8 = melt(self.df, id_vars=['id1', 'id2'], |
| 66 | + value_vars='A', var_name=self.var_name) |
| 67 | + self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) |
| 68 | + |
| 69 | + result9 = melt(self.df, id_vars=['id1', 'id2'], |
| 70 | + value_vars=['A', 'B'], var_name=self.var_name) |
| 71 | + expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, |
| 72 | + 'id2': self.df['id2'].tolist() * 2, |
| 73 | + self.var_name: ['A']*10 + ['B']*10, |
| 74 | + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, |
| 75 | + columns=['id1', 'id2', self.var_name, 'value']) |
| 76 | + tm.assert_frame_equal(result9, expected9) |
| 77 | + |
| 78 | + def test_custom_value_name(self): |
| 79 | + result10 = melt(self.df, value_name=self.value_name) |
| 80 | + self.assertEqual(result10.columns.tolist(), ['variable', 'val']) |
| 81 | + |
| 82 | + result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) |
| 83 | + self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) |
| 84 | + |
| 85 | + result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name) |
| 86 | + self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) |
| 87 | + |
| 88 | + result13 = melt(self.df, id_vars=['id1', 'id2'], |
| 89 | + value_vars='A', value_name=self.value_name) |
| 90 | + self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) |
| 91 | + |
| 92 | + result14 = melt(self.df, id_vars=['id1', 'id2'], |
| 93 | + value_vars=['A', 'B'], value_name=self.value_name) |
| 94 | + expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, |
| 95 | + 'id2': self.df['id2'].tolist() * 2, |
| 96 | + 'variable': ['A']*10 + ['B']*10, |
| 97 | + self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, |
| 98 | + columns=['id1', 'id2', 'variable', self.value_name]) |
| 99 | + tm.assert_frame_equal(result14, expected14) |
| 100 | + |
| 101 | + def test_custom_var_and_value_name(self): |
| 102 | + |
| 103 | + result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) |
| 104 | + self.assertEqual(result15.columns.tolist(), ['var', 'val']) |
| 105 | + |
| 106 | + result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) |
| 107 | + self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) |
| 108 | + |
| 109 | + result17 = melt(self.df, id_vars=['id1', 'id2'], |
| 110 | + var_name=self.var_name, value_name=self.value_name) |
| 111 | + self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val']) |
| 112 | + |
| 113 | + result18 = melt(df, id_vars=['id1', 'id2'], |
| 114 | + value_vars='A', var_name=self.var_name, value_name=self.value_name) |
| 115 | + self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) |
| 116 | + |
| 117 | + result19 = melt(self.df, id_vars=['id1', 'id2'], |
| 118 | + value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) |
| 119 | + expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, |
| 120 | + 'id2': self.df['id2'].tolist() * 2, |
| 121 | + var_name: ['A']*10 + ['B']*10, |
| 122 | + value_name: self.df['A'].tolist() + self.df['B'].tolist()}, |
| 123 | + columns=['id1', 'id2', self.var_name, self.value_name]) |
| 124 | + tm.assert_frame_equal(result19, expected19) |
| 125 | + |
| 126 | + def test_custom_var_and_value_name(self): |
| 127 | + self.df.columns.name = 'foo' |
| 128 | + result20 = melt(self.df) |
| 129 | + self.assertEqual(result20.columns.tolist(), ['foo', 'value']) |
| 130 | + |
| 131 | +class TestConvertDummies(unittest.TestCase): |
| 132 | + def test_convert_dummies(self): |
| 133 | + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', |
| 134 | + 'foo', 'bar', 'foo', 'foo'], |
| 135 | + 'B': ['one', 'one', 'two', 'three', |
| 136 | + 'two', 'two', 'one', 'three'], |
| 137 | + 'C': np.random.randn(8), |
| 138 | + 'D': np.random.randn(8)}) |
| 139 | + |
| 140 | + result = convert_dummies(df, ['A', 'B']) |
| 141 | + result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') |
| 142 | + |
| 143 | + expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], |
| 144 | + 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], |
| 145 | + 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], |
| 146 | + 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], |
| 147 | + 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], |
| 148 | + 'C': df['C'].values, |
| 149 | + 'D': df['D'].values}, |
| 150 | + columns=result.columns, dtype=float) |
| 151 | + expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) |
| 152 | + |
| 153 | + tm.assert_frame_equal(result, expected) |
| 154 | + tm.assert_frame_equal(result2, expected2) |
| 155 | + |
| 156 | + |
| 157 | +class TestLreshape(unittest.TestCase): |
122 | 158 |
|
123 | 159 | def test_pairs(self):
|
124 | 160 | data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008',
|
|
0 commit comments