|
10 | 10 | import numpy as np
|
11 | 11 |
|
12 | 12 | from pandas.compat import u
|
13 |
| -from pandas import DataFrame, Index, Series, MultiIndex, date_range |
| 13 | +from pandas import DataFrame, Index, Series, MultiIndex, date_range, Timedelta, Period |
14 | 14 | import pandas as pd
|
15 | 15 |
|
16 | 16 | from pandas.util.testing import (assert_series_equal,
|
@@ -136,6 +136,141 @@ def test_stack_unstack(self):
|
136 | 136 | assert_frame_equal(unstacked_cols.T, self.frame)
|
137 | 137 | assert_frame_equal(unstacked_cols_df['bar'].T, self.frame)
|
138 | 138 |
|
| 139 | + def test_unstack_fill(self): |
| 140 | + |
| 141 | + # GH #9746: fill_value keyword argument for Series |
| 142 | + # and DataFrame unstack |
| 143 | + |
| 144 | + # From a series |
| 145 | + data = Series([1, 2, 4, 5], dtype=np.int16) |
| 146 | + data.index = MultiIndex.from_tuples( |
| 147 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 148 | + |
| 149 | + result = data.unstack(fill_value=-1) |
| 150 | + expected = DataFrame({'a': [1, -1, 5], 'b': [2, 4, -1]}, |
| 151 | + index=['x', 'y', 'z'], dtype=np.int16) |
| 152 | + assert_frame_equal(result, expected) |
| 153 | + |
| 154 | + # From a series with incorrect data type for fill_value |
| 155 | + result = data.unstack(fill_value=0.5) |
| 156 | + expected = DataFrame({'a': [1, 0.5, 5], 'b': [2, 4, 0.5]}, |
| 157 | + index=['x', 'y', 'z'], dtype=np.float) |
| 158 | + assert_frame_equal(result, expected) |
| 159 | + |
| 160 | + # From a dataframe |
| 161 | + rows = [[1, 2], [3, 4], [5, 6], [7, 8]] |
| 162 | + df = DataFrame(rows, columns=list('AB'), dtype=np.int32) |
| 163 | + df.index = MultiIndex.from_tuples( |
| 164 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 165 | + |
| 166 | + result = df.unstack(fill_value=-1) |
| 167 | + |
| 168 | + rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]] |
| 169 | + expected = DataFrame(rows, index=list('xyz'), dtype=np.int32) |
| 170 | + expected.columns = MultiIndex.from_tuples( |
| 171 | + [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')]) |
| 172 | + assert_frame_equal(result, expected) |
| 173 | + |
| 174 | + # From a mixed type dataframe |
| 175 | + df['A'] = df['A'].astype(np.int16) |
| 176 | + df['B'] = df['B'].astype(np.float64) |
| 177 | + |
| 178 | + result = df.unstack(fill_value=-1) |
| 179 | + expected['A'] = expected['A'].astype(np.int16) |
| 180 | + expected['B'] = expected['B'].astype(np.float64) |
| 181 | + assert_frame_equal(result, expected) |
| 182 | + |
| 183 | + # From a dataframe with incorrect data type for fill_value |
| 184 | + result = df.unstack(fill_value=0.5) |
| 185 | + |
| 186 | + rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]] |
| 187 | + expected = DataFrame(rows, index=list('xyz'), dtype=np.float) |
| 188 | + expected.columns = MultiIndex.from_tuples( |
| 189 | + [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')]) |
| 190 | + assert_frame_equal(result, expected) |
| 191 | + |
| 192 | + # Test unstacking with date times |
| 193 | + dv = pd.date_range('2012-01-01', periods=4).values |
| 194 | + data = Series(dv) |
| 195 | + data.index = MultiIndex.from_tuples( |
| 196 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 197 | + |
| 198 | + result = data.unstack() |
| 199 | + expected = DataFrame({'a': [dv[0], pd.NaT, dv[3]], |
| 200 | + 'b': [dv[1], dv[2], pd.NaT]}, |
| 201 | + index=['x', 'y', 'z']) |
| 202 | + assert_frame_equal(result, expected) |
| 203 | + |
| 204 | + result = data.unstack(fill_value=dv[0]) |
| 205 | + expected = DataFrame({'a': [dv[0], dv[0], dv[3]], |
| 206 | + 'b': [dv[1], dv[2], dv[0]]}, |
| 207 | + index=['x', 'y', 'z']) |
| 208 | + assert_frame_equal(result, expected) |
| 209 | + |
| 210 | + # Test unstacking with time deltas |
| 211 | + td = [Timedelta(days=i) for i in range(4)] |
| 212 | + data = Series(td) |
| 213 | + data.index = MultiIndex.from_tuples( |
| 214 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 215 | + |
| 216 | + result = data.unstack() |
| 217 | + expected = DataFrame({'a': [td[0], pd.NaT, td[3]], |
| 218 | + 'b': [td[1], td[2], pd.NaT]}, |
| 219 | + index=['x', 'y', 'z']) |
| 220 | + assert_frame_equal(result, expected) |
| 221 | + |
| 222 | + result = data.unstack(fill_value=td[1]) |
| 223 | + expected = DataFrame({'a': [td[0], td[1], td[3]], |
| 224 | + 'b': [td[1], td[2], td[1]]}, |
| 225 | + index=['x', 'y', 'z']) |
| 226 | + assert_frame_equal(result, expected) |
| 227 | + |
| 228 | + # Test unstacking with period |
| 229 | + periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'), |
| 230 | + Period('2012-04')] |
| 231 | + data = Series(periods) |
| 232 | + data.index = MultiIndex.from_tuples( |
| 233 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 234 | + |
| 235 | + result = data.unstack() |
| 236 | + expected = DataFrame({'a': [periods[0], None, periods[3]], |
| 237 | + 'b': [periods[1], periods[2], None]}, |
| 238 | + index=['x', 'y', 'z']) |
| 239 | + assert_frame_equal(result, expected) |
| 240 | + |
| 241 | + result = data.unstack(fill_value=periods[1]) |
| 242 | + expected = DataFrame({'a': [periods[0], periods[1], periods[3]], |
| 243 | + 'b': [periods[1], periods[2], periods[1]]}, |
| 244 | + index=['x', 'y', 'z']) |
| 245 | + assert_frame_equal(result, expected) |
| 246 | + |
| 247 | + # Test unstacking with categorical |
| 248 | + data = pd.Series(['a', 'b', 'c', 'a'], dtype='category') |
| 249 | + data.index = pd.MultiIndex.from_tuples( |
| 250 | + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) |
| 251 | + |
| 252 | + # By default missing values will be NaN |
| 253 | + result = data.unstack() |
| 254 | + expected = DataFrame({'a': pd.Categorical(list('axa'), |
| 255 | + categories=list('abc')), |
| 256 | + 'b': pd.Categorical(list('bcx'), |
| 257 | + categories=list('abc'))}, |
| 258 | + index=list('xyz')) |
| 259 | + assert_frame_equal(result, expected) |
| 260 | + |
| 261 | + # Fill with non-category results in NaN entries similar to above |
| 262 | + result = data.unstack(fill_value='d') |
| 263 | + assert_frame_equal(result, expected) |
| 264 | + |
| 265 | + # Fill with category value replaces missing values as expected |
| 266 | + result = data.unstack(fill_value='c') |
| 267 | + expected = DataFrame({'a': pd.Categorical(list('aca'), |
| 268 | + categories=list('abc')), |
| 269 | + 'b': pd.Categorical(list('bcc'), |
| 270 | + categories=list('abc'))}, |
| 271 | + index=list('xyz')) |
| 272 | + assert_frame_equal(result, expected) |
| 273 | + |
139 | 274 | def test_stack_ints(self):
|
140 | 275 | df = DataFrame(
|
141 | 276 | np.random.randn(30, 27),
|
|
0 commit comments