diff --git a/doc/source/release.rst b/doc/source/release.rst index 216b7f2ca6e5a..40ad07aea1ecf 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -569,6 +569,7 @@ Bug Fixes - Fixed a bug where default options were being overwritten in the option parser cleaning (:issue:`5121`). - Treat a list/ndarray identically for ``iloc`` indexing with list-like (:issue:`5006`) + - Fix ``MultiIndex.get_level_values()`` with missing values (:issue:`5074`) pandas 0.12.0 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index 8e98cc6fb25bb..465a0439c6eb3 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -393,6 +393,9 @@ def values(self): def get_values(self): return self.values + _na_value = np.nan + """The expected NA value to use with this index.""" + @property def is_monotonic(self): return self._engine.is_monotonic @@ -2256,7 +2259,8 @@ def get_level_values(self, level): num = self._get_level_number(level) unique_vals = self.levels[num] # .values labels = self.labels[num] - values = unique_vals.take(labels) + values = Index(com.take_1d(unique_vals.values, labels, + fill_value=unique_vals._na_value)) values.name = self.names[num] return values diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 5404b30af8d1c..7e801c0a202db 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1445,6 +1445,39 @@ def test_get_level_values(self): expected = self.index.get_level_values(0) self.assert_(np.array_equal(result, expected)) + def test_get_level_values_na(self): + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [1, np.nan, 2] + assert_array_equal(values.values.astype(float), expected) + + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [np.nan, np.nan, 2] + assert_array_equal(values.values.astype(float), expected) + + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + expected = [np.nan, np.nan, np.nan] + assert_array_equal(values.values.astype(float), expected) + values = index.get_level_values(1) + expected = ['a', np.nan, 1] + assert_array_equal(values.values, expected) + + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + assert_array_equal(values.values, expected.values) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + self.assertEqual(values.shape, (0,)) + def test_reorder_levels(self): # this blows up assertRaisesRegexp(IndexError, '^Too many levels', diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 281ac0cc8a35a..ce1bea96b9d4c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -495,6 +495,9 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return tslib.ints_to_pydatetime(self.asi8, self.tz) + _na_value = tslib.NaT + """The expected NA value to use with this index.""" + def __unicode__(self): from pandas.core.format import _format_datetime64 values = self.values