From 44c82df5befaed201e344f71689f13493adde8e4 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Tue, 9 Jul 2013 18:19:42 +0100 Subject: [PATCH] ENH drop_level argument for xs --- doc/source/release.rst | 1 + pandas/core/frame.py | 11 ++++++++--- pandas/core/index.py | 17 ++++++++++------- pandas/tests/test_frame.py | 12 ++++++++++++ 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index b301dcb80445a..bddf720a6b72e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -48,6 +48,7 @@ pandas 0.13 overlapping color and style arguments (:issue:`4402`) - Significant table writing performance improvements in ``HDFStore`` - JSON date serialisation now performed in low-level C code. + - Add ``drop_level`` argument to xs (:issue:`4180`) - ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to change attributes (i.e., ``names``, ``levels``, ``labels``) (:issue:`4039`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 31f7179f8e328..60492b13c30b8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2063,7 +2063,7 @@ def _sanitize_column(self, key, value): def _series(self): return self._data.get_series_dict() - def xs(self, key, axis=0, level=None, copy=True): + def xs(self, key, axis=0, level=None, copy=True, drop_level=True): """ Returns a cross-section (row(s) or column(s)) from the DataFrame. Defaults to cross-section on the rows (axis=0). @@ -2079,6 +2079,8 @@ def xs(self, key, axis=0, level=None, copy=True): which levels are used. Levels can be referred by label or position. copy : boolean, default True Whether to make a copy of the data + drop_level, default True + If False, returns object with same levels as self. Examples -------- @@ -2130,11 +2132,13 @@ def xs(self, key, axis=0, level=None, copy=True): Returns ------- xs : Series or DataFrame + """ axis = self._get_axis_number(axis) labels = self._get_axis(axis) if level is not None: - loc, new_ax = labels.get_loc_level(key, level=level) + loc, new_ax = labels.get_loc_level(key, level=level, + drop_level=drop_level) if not copy and not isinstance(loc, slice): raise ValueError('Cannot retrieve view (copy=False)') @@ -2168,7 +2172,8 @@ def xs(self, key, axis=0, level=None, copy=True): index = self.index if isinstance(index, MultiIndex): - loc, new_index = self.index.get_loc_level(key) + loc, new_index = self.index.get_loc_level(key, + drop_level=drop_level) else: loc = self.index.get_loc(key) diff --git a/pandas/core/index.py b/pandas/core/index.py index 22bd7f318a237..05eb53a444294 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2560,7 +2560,7 @@ def get_loc(self, key): else: return self._get_level_indexer(key, level=0) - def get_loc_level(self, key, level=0): + def get_loc_level(self, key, level=0, drop_level=True): """ Get integer location slice for requested label or tuple @@ -2572,7 +2572,9 @@ def get_loc_level(self, key, level=0): ------- loc : int or slice object """ - def _drop_levels(indexer, levels): + def _maybe_drop_levels(indexer, levels, drop_level): + if not drop_level: + return self[indexer] # kludgearound new_index = self[indexer] levels = [self._get_level_number(i) for i in levels] @@ -2593,7 +2595,8 @@ def _drop_levels(indexer, levels): loc = mask result = loc if result is None else result & loc - return result, _drop_levels(result, level) + + return result, _maybe_drop_levels(result, level, drop_level) level = self._get_level_number(level) @@ -2606,7 +2609,7 @@ def _drop_levels(indexer, levels): try: if key in self.levels[0]: indexer = self._get_level_indexer(key, level=level) - new_index = _drop_levels(indexer, [0]) + new_index = _maybe_drop_levels(indexer, [0], drop_level) return indexer, new_index except TypeError: pass @@ -2625,7 +2628,7 @@ def _drop_levels(indexer, levels): raise KeyError(key) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _drop_levels(indexer, ilevels) + return indexer, _maybe_drop_levels(indexer, ilevels, drop_level) else: indexer = None for i, k in enumerate(key): @@ -2652,10 +2655,10 @@ def _drop_levels(indexer, levels): indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _drop_levels(indexer, ilevels) + return indexer, _maybe_drop_levels(indexer, ilevels, drop_level) else: indexer = self._get_level_indexer(key, level=level) - new_index = _drop_levels(indexer, [level]) + new_index = _maybe_drop_levels(indexer, [level], drop_level) return indexer, new_index def _get_level_indexer(self, key, level=0): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 517c984fa0e64..8e769ed91137f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7261,6 +7261,18 @@ def test_xs_duplicates(self): exp = df.irow(2) assert_series_equal(cross, exp) + def test_xs_keep_level(self): + df = DataFrame({'day': {0: 'sat', 1: 'sun'}, + 'flavour': {0: 'strawberry', 1: 'strawberry'}, + 'sales': {0: 10, 1: 12}, + 'year': {0: 2008, 1: 2008}}).set_index(['year','flavour','day']) + result = df.xs('sat', level='day', drop_level=False) + expected = df[:1] + assert_frame_equal(result, expected) + + result = df.xs([2008, 'sat'], level=['year', 'day'], drop_level=False) + assert_frame_equal(result, expected) + def test_pivot(self): data = { 'index': ['A', 'B', 'C', 'C', 'B', 'A'],