From 1b3f381e2cf5048e1ab7a0fa5d42588107229419 Mon Sep 17 00:00:00 2001 From: Goyo Date: Mon, 7 Oct 2013 21:44:38 +0200 Subject: [PATCH 1/2] BUG: MultiIndex.get_level_values() replaces NA by another value (#5074) --- doc/source/release.rst | 1 + pandas/core/index.py | 7 ++++++- pandas/tests/test_index.py | 33 +++++++++++++++++++++++++++++++++ pandas/tseries/index.py | 4 ++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 216b7f2ca6e5a..40ad07aea1ecf 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -569,6 +569,7 @@ Bug Fixes - Fixed a bug where default options were being overwritten in the option parser cleaning (:issue:`5121`). - Treat a list/ndarray identically for ``iloc`` indexing with list-like (:issue:`5006`) + - Fix ``MultiIndex.get_level_values()`` with missing values (:issue:`5074`) pandas 0.12.0 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index 8e98cc6fb25bb..5cadd6de2ee5f 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -393,6 +393,10 @@ def values(self): def get_values(self): return self.values + def _na_value(self): + # The expected NA value to use with this index. + return np.nan + @property def is_monotonic(self): return self._engine.is_monotonic @@ -2256,7 +2260,8 @@ def get_level_values(self, level): num = self._get_level_number(level) unique_vals = self.levels[num] # .values labels = self.labels[num] - values = unique_vals.take(labels) + values = Index(com.take_1d(unique_vals.values, labels, + fill_value=unique_vals._na_value())) values.name = self.names[num] return values diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 5404b30af8d1c..7e801c0a202db 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1445,6 +1445,39 @@ def test_get_level_values(self): expected = self.index.get_level_values(0) self.assert_(np.array_equal(result, expected)) + def test_get_level_values_na(self): + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [1, np.nan, 2] + assert_array_equal(values.values.astype(float), expected) + + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [np.nan, np.nan, 2] + assert_array_equal(values.values.astype(float), expected) + + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + expected = [np.nan, np.nan, np.nan] + assert_array_equal(values.values.astype(float), expected) + values = index.get_level_values(1) + expected = ['a', np.nan, 1] + assert_array_equal(values.values, expected) + + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + assert_array_equal(values.values, expected.values) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + self.assertEqual(values.shape, (0,)) + def test_reorder_levels(self): # this blows up assertRaisesRegexp(IndexError, '^Too many levels', diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 281ac0cc8a35a..0b02235010c9c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -495,6 +495,10 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return tslib.ints_to_pydatetime(self.asi8, self.tz) + def _na_value(self): + # The expected NA value to use with this index. + return tslib.NaT + def __unicode__(self): from pandas.core.format import _format_datetime64 values = self.values From 573fee62f334045e5d410ce1e4ee035409e7325b Mon Sep 17 00:00:00 2001 From: Goyo Date: Sun, 6 Oct 2013 14:51:27 +0200 Subject: [PATCH 2/2] Make _na_value a class attribute. --- pandas/core/index.py | 7 +++---- pandas/tseries/index.py | 5 ++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 5cadd6de2ee5f..465a0439c6eb3 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -393,9 +393,8 @@ def values(self): def get_values(self): return self.values - def _na_value(self): - # The expected NA value to use with this index. - return np.nan + _na_value = np.nan + """The expected NA value to use with this index.""" @property def is_monotonic(self): @@ -2261,7 +2260,7 @@ def get_level_values(self, level): unique_vals = self.levels[num] # .values labels = self.labels[num] values = Index(com.take_1d(unique_vals.values, labels, - fill_value=unique_vals._na_value())) + fill_value=unique_vals._na_value)) values.name = self.names[num] return values diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 0b02235010c9c..ce1bea96b9d4c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -495,9 +495,8 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return tslib.ints_to_pydatetime(self.asi8, self.tz) - def _na_value(self): - # The expected NA value to use with this index. - return tslib.NaT + _na_value = tslib.NaT + """The expected NA value to use with this index.""" def __unicode__(self): from pandas.core.format import _format_datetime64