From aa19dbff844c11193dbf4ff761342f929d69de23 Mon Sep 17 00:00:00 2001 From: Daniel Grady Date: Sun, 31 Jan 2016 20:01:28 -0800 Subject: [PATCH] BUG: Handle variables named 'name' in get_dummies, #12180 --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/strings.py | 4 +++- pandas/tests/test_strings.py | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 13b7b33fff527..fad4c7e3d5d0a 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -521,6 +521,7 @@ Bug Fixes - Bug in ``read_csv`` when reading from a ``StringIO`` in threads (:issue:`11790`) - Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`) - Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`) +- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1ffa836a75a1b..be78c950eff9d 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1105,9 +1105,11 @@ def _wrap_result(self, result, use_codes=True, name=None): if not hasattr(result, 'ndim'): return result - name = name or getattr(result, 'name', None) or self._orig.name if result.ndim == 1: + # Wait until we are sure result is a Series or Index before + # checking attributes (GH 12180) + name = name or getattr(result, 'name', None) or self._orig.name if isinstance(self._orig, Index): # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f8255c4b4a410..bc540cc8bf92b 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -812,6 +812,14 @@ def test_get_dummies(self): idx = Index(['a|b', 'a|c', 'b|c']) idx.str.get_dummies('|') + # GH 12180 + # Dummies named 'name' should work as expected + s = Series(['a', 'b,name', 'b']) + result = s.str.get_dummies(',') + expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], + columns=['a', 'b', 'name']) + tm.assert_frame_equal(result, expected) + def test_join(self): values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h']) result = values.str.split('_').str.join('_')