|
4 | 4 | import numpy as np
|
5 | 5 | from pandas.compat import zip
|
6 | 6 |
|
7 |
| -from pandas import DataFrame, Series, unique |
| 7 | +from pandas import (DataFrame, Series, unique, Index, Categorical, CategoricalIndex, |
| 8 | + DatetimeIndex, TimedeltaIndex) |
8 | 9 | import pandas.util.testing as tm
|
9 | 10 | from pandas.util.testing import assertRaisesRegexp
|
10 | 11 | import pandas.core.common as com
|
@@ -97,6 +98,45 @@ def test_label_precision(self):
|
97 | 98 | '(0.54, 0.72]']
|
98 | 99 | self.assert_numpy_array_equal(result.categories, ex_levels)
|
99 | 100 |
|
| 101 | + def test_label_coercion(self): |
| 102 | + # GH10140 |
| 103 | + |
| 104 | + df = DataFrame({'x' : 100 * np.random.random(100)}) |
| 105 | + df['y'] = df.x**2 |
| 106 | + |
| 107 | + binedges = np.arange(0,110,10) |
| 108 | + binlabels = np.arange(5,105,10) |
| 109 | + |
| 110 | + # passing in an index |
| 111 | + for bl, expected in [(Index(binlabels), np.dtype('int64')), |
| 112 | + (DatetimeIndex(['20130101']*len(binlabels))+TimedeltaIndex(binlabels,unit='D'),np.dtype('M8[ns]')), |
| 113 | + (TimedeltaIndex(binlabels,unit='D'),np.dtype('m8[ns]')), |
| 114 | + (Categorical(binlabels), 'category'), |
| 115 | + (Index(Index(binlabels).map(str)), 'category')]: |
| 116 | + result = cut(df.x, bins=binedges, labels=bl) |
| 117 | + self.assertEqual(result.dtype, expected) |
| 118 | + z = df.groupby(result).y.mean() |
| 119 | + self.assertEqual(z.index.dtype, expected) |
| 120 | + |
| 121 | + # passing in a list-like |
| 122 | + for bl, expected in [(Index(binlabels), np.dtype('int64')), |
| 123 | + (Index(Index(binlabels).map(str)), 'category')]: |
| 124 | + bl = np.asarray(bl) |
| 125 | + result = cut(df.x, bins=binedges, labels=bl) |
| 126 | + self.assertEqual(result.dtype, expected) |
| 127 | + z = df.groupby(result).y.mean() |
| 128 | + self.assertEqual(z.index.dtype, expected) |
| 129 | + |
| 130 | + # reversed categories |
| 131 | + bl = Categorical(binlabels,categories=binlabels[::-1],ordered=True) |
| 132 | + expected = Index(bl).dtype |
| 133 | + result = cut(df.x, bins=binedges, labels=bl) |
| 134 | + self.assertEqual(result.dtype, expected) |
| 135 | + z = df.groupby(result).y.mean() |
| 136 | + self.assertEqual(z.index.dtype, expected) |
| 137 | + tm.assert_index_equal(z.index, |
| 138 | + CategoricalIndex(Categorical.from_codes(np.arange(len(bl)),categories=bl.categories,ordered=True),name='x')) |
| 139 | + |
100 | 140 | def test_na_handling(self):
|
101 | 141 | arr = np.arange(0, 0.75, 0.01)
|
102 | 142 | arr[::3] = np.nan
|
|
0 commit comments