From b5dde1e6f4f75aeceacddb28469afe2d3ec633f7 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 11:36:50 -0500 Subject: [PATCH 01/22] ENH: adds warning when setting list-like into attribute Adds warning in generic setattr logical branch for when attribute does not exist and user is supplying a list-like object. Warning states that Series cannot be assigned into nonexistent columns, and includes a link to stable documentation. Closes #7175. --- pandas/core/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 442ec93d94023..5f3da15eade22 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3357,6 +3357,10 @@ def __setattr__(self, name, value): else: object.__setattr__(self, name, value) except (AttributeError, TypeError): + if (self.ndim > 1) and (is_list_like(value)): + warnings.warn("Pandas doesn't allow Series to be assigned " + "into nonexistent columns - see " + "https://pandas.pydata.org/pandas-docs/stable""/indexing.html#attribute-access") object.__setattr__(self, name, value) # ---------------------------------------------------------------------- From 4d3f87d13e144a615ab46f1e1883a4b3a96524bc Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 11:41:38 -0500 Subject: [PATCH 02/22] TST: Adds tests for warning behavior on set on nonexistent attr --- pandas/tests/dtypes/test_generic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 653d7d3082c08..5430efbbf8c97 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt +import pytest class TestABCClasses(object): @@ -17,6 +18,7 @@ class TestABCClasses(object): df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) + series = pd.Series([1, 2, 3]) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -38,3 +40,8 @@ def test_abc_types(self): assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + with catch_warnings(record=True) as w: + self.series.not_an_index = [1, 2] + assert len(w) == 0 # fail if false warning on Series + with pytest.warns(UserWarning): + self.df.not_a_column = [1, 2] From fb298570245c9409d17319f8e20aa4e40f9fb13b Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 11:44:23 -0500 Subject: [PATCH 03/22] DOC: replaces silent error for setattr on nonexistent name with UserWarning --- doc/source/indexing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1659d57b33b84..b83d938d9203f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -228,7 +228,7 @@ as an attribute: panel.one You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it fails silently, creating a new attribute rather than a +if you try to use attribute access to create a new column, it issues a `UserWarning` and creates a new attribute rather than a new column. .. ipython:: python From af5ab38db6cf81ec0329f530e10511eb5d470ba6 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 11:51:22 -0500 Subject: [PATCH 04/22] CLN: juggles doc url to fit within 80 char per line --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5f3da15eade22..2415623201066 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3360,7 +3360,8 @@ def __setattr__(self, name, value): if (self.ndim > 1) and (is_list_like(value)): warnings.warn("Pandas doesn't allow Series to be assigned " "into nonexistent columns - see " - "https://pandas.pydata.org/pandas-docs/stable""/indexing.html#attribute-access") + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access") object.__setattr__(self, name, value) # ---------------------------------------------------------------------- From f485e775b55acffc42157f7329c5246ede88e55d Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 12:26:35 -0500 Subject: [PATCH 05/22] BUG: replaces single backticks with double backticks for rst --- doc/source/indexing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index b83d938d9203f..d04cad1a1e5e8 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -228,7 +228,7 @@ as an attribute: panel.one You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it issues a `UserWarning` and creates a new attribute rather than a +if you try to use attribute access to create a new column, it issues a ```UserWarning`` and creates a new attribute rather than a new column. .. ipython:: python From bfb9f93364e297cba904dbf9ee9b6d12653ed1a9 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 12:51:20 -0500 Subject: [PATCH 06/22] BUG: adds stacklevel to warning call --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2415623201066..1a8c590cbe174 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3361,7 +3361,8 @@ def __setattr__(self, name, value): warnings.warn("Pandas doesn't allow Series to be assigned " "into nonexistent columns - see " "https://pandas.pydata.org/pandas-docs/" - "stable/indexing.html#attribute-access") + "stable/indexing.html#attribute-access", + stacklevel=2) object.__setattr__(self, name, value) # ---------------------------------------------------------------------- From 61eca9d86393275340898b8c83bc5826afd37655 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 13:42:45 -0500 Subject: [PATCH 07/22] DOC: adds 0.21.0 for appearance of setting warnings --- doc/source/indexing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index d04cad1a1e5e8..095c452c62701 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -228,8 +228,8 @@ as an attribute: panel.one You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it issues a ```UserWarning`` and creates a new attribute rather than a -new column. +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. This behavior will incur a ``UserWarning`` in 0.21.0 and later. .. ipython:: python From 15ebdbc0847f22939523cace53c6f1acc701e219 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 13:44:30 -0500 Subject: [PATCH 08/22] ENH: Add warning when colname collides with methods Current behavior does not allow attribute-like access when the column name of a DataFrame matches the name of a method. This commit adds an explicit warning about this behavior. Closes #5904. --- pandas/core/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1a8c590cbe174..497651526c4cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1907,6 +1907,10 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): + if callable(getattr(self, key, None)): + warnings.warn("Pandas doesn't allow attribute-like access to " + "columns whose names collide with methods", + stacklevel=3) self._data.set(key, value) self._clear_item_cache() From 30d98dec2966d73fb428d5ed7d40ff09168630d9 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 13:46:15 -0500 Subject: [PATCH 09/22] TST: Adds testing around warnings for setting attributes --- pandas/tests/dtypes/test_generic.py | 35 ++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 5430efbbf8c97..2803e1614c19c 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt -import pytest +from pandas.util import testing as tm class TestABCClasses(object): @@ -40,8 +40,33 @@ def test_abc_types(self): assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + + +class TestABCWarnings(object): + # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash + # GH7175 - GOTCHA: You can't use dot notation to add a column... + d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} + df = pd.DataFrame(d) + + def test_setattr_warnings(self): + with catch_warnings(record=True) as w: + # successfully add new column + self.df['three'] = self.df.two + 1 + assert len(w) == 0 + assert self.df.three.sum() > self.df.two.sum() + with catch_warnings(record=True) as w: + # successfully modify column in place + self.df.one += 1 + assert len(w) == 0 + assert self.df.one.iloc[0] == 2 with catch_warnings(record=True) as w: - self.series.not_an_index = [1, 2] - assert len(w) == 0 # fail if false warning on Series - with pytest.warns(UserWarning): - self.df.not_a_column = [1, 2] + # successfully add an attribute to a series + self.df.two.not_an_index = [1, 2] + assert len(w) == 0 + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + self.df.four = self.df.two + 2 + with tm.assert_produces_warning(UserWarning): + # warn when column has same name as method + self.df['sum'] = self.df.two From 8d38f68e4a26d534e1edbb1aa41f8c252e89994d Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 14:15:16 -0500 Subject: [PATCH 10/22] CLN: reformatting and cleanup of setting tests --- pandas/tests/dtypes/test_generic.py | 49 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 2803e1614c19c..73d9c0b1a1f49 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,7 +18,6 @@ class TestABCClasses(object): df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) - series = pd.Series([1, 2, 3]) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -42,31 +41,35 @@ def test_abc_types(self): assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) -class TestABCWarnings(object): +def test_setattr_warnings(): # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash # GH7175 - GOTCHA: You can't use dot notation to add a column... d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} df = pd.DataFrame(d) - def test_setattr_warnings(self): - with catch_warnings(record=True) as w: - # successfully add new column - self.df['three'] = self.df.two + 1 - assert len(w) == 0 - assert self.df.three.sum() > self.df.two.sum() - with catch_warnings(record=True) as w: - # successfully modify column in place - self.df.one += 1 - assert len(w) == 0 - assert self.df.one.iloc[0] == 2 - with catch_warnings(record=True) as w: - # successfully add an attribute to a series - self.df.two.not_an_index = [1, 2] - assert len(w) == 0 - with tm.assert_produces_warning(UserWarning): - # warn when setting column to nonexistent name - self.df.four = self.df.two + 2 - with tm.assert_produces_warning(UserWarning): - # warn when column has same name as method - self.df['sum'] = self.df.two + with catch_warnings(record=True) as w: + # successfully add new column + df['three'] = df.two + 1 + assert len(w) == 0 + assert df.three.sum() > df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() + + with tm.assert_produces_warning(UserWarning): + # warn when column has same name as method + df['sum'] = df.two From c9c43db5579a94a44e741ad88a177c56108ca804 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 14:26:52 -0500 Subject: [PATCH 11/22] ENH: Clarifies wording of warning message for column name collision --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 497651526c4cf..d626d1e42a93e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1908,9 +1908,9 @@ def _slice(self, slobj, axis=0, kind=None): def _set_item(self, key, value): if callable(getattr(self, key, None)): - warnings.warn("Pandas doesn't allow attribute-like access to " - "columns whose names collide with methods", - stacklevel=3) + warnings.warn("Column name '{key}' collides with a built-in " + "method, which will cause unexpected attribute " + "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() From 0a25a569add73e8c5a76710e9961ce83c66aa4e1 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 15 Jul 2017 16:26:17 -0500 Subject: [PATCH 12/22] BUG: coerces _set_item key to a string --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d626d1e42a93e..0e8e180aeab81 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1907,7 +1907,7 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if callable(getattr(self, key, None)): + if callable(getattr(self, str(key), None)): warnings.warn("Column name '{key}' collides with a built-in " "method, which will cause unexpected attribute " "behavior".format(key=key), stacklevel=3) From 150d58678005cbe808ce4526bb2b47eb10c4fe00 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Tue, 18 Jul 2017 19:20:43 -0500 Subject: [PATCH 13/22] FIX: only warn getattr when key is str --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e8e180aeab81..a24fb4e5a1e34 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1907,7 +1907,7 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if callable(getattr(self, str(key), None)): + if isinstance(key, string_types) and callable(getattr(self, key, None)): warnings.warn("Column name '{key}' collides with a built-in " "method, which will cause unexpected attribute " "behavior".format(key=key), stacklevel=3) From 02cb4263965a8ab790513f2772af2d80f67514f5 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Tue, 18 Jul 2017 19:21:35 -0500 Subject: [PATCH 14/22] TST: adds comments about tests which should not warn --- pandas/tests/dtypes/test_generic.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 73d9c0b1a1f49..ec850cc34e23b 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -49,27 +49,30 @@ def test_setattr_warnings(): df = pd.DataFrame(d) with catch_warnings(record=True) as w: - # successfully add new column + # successfully add new column + # this should not raise a warning df['three'] = df.two + 1 assert len(w) == 0 assert df.three.sum() > df.two.sum() with catch_warnings(record=True) as w: - # successfully modify column in place + # successfully modify column in place + # this should not raise a warning df.one += 1 assert len(w) == 0 assert df.one.iloc[0] == 2 with catch_warnings(record=True) as w: - # successfully add an attribute to a series + # successfully add an attribute to a series + # this should not raise a warning df.two.not_an_index = [1, 2] assert len(w) == 0 with tm.assert_produces_warning(UserWarning): - # warn when setting column to nonexistent name + # warn when setting column to nonexistent name df.four = df.two + 2 assert df.four.sum() > df.two.sum() - + with tm.assert_produces_warning(UserWarning): - # warn when column has same name as method + # warn when column has same name as method df['sum'] = df.two From 562c1644b8bc7627313de62ae2115ee1b4de7855 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Tue, 18 Jul 2017 20:10:19 -0500 Subject: [PATCH 15/22] DOC: adds column/attribute warnings to whatsnew --- doc/source/whatsnew/v0.21.0.txt | 58 ++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2f61b71d06019..0b3868c7792b3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -29,7 +29,6 @@ New features - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to support type inference in the presence of missing values (:issue:`17059`). - .. _whatsnew_0210.enhancements.infer_objects: ``infer_objects`` type conversion @@ -62,6 +61,63 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes +.. _whatsnew_0210.enhancements.column_creation: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often flummoxed by the relationship between column operations and attribute +access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances +of this confusion include attempting to create a new column by setting into an attribute: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + +which does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +and creating a column whose name collides with a method or attribute already in the instance +namespace: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + +which does not permit that column to be accessed as an attribute: + +.. code-block:: ipython + + In[5]: df.sum + Out[5]: + + +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. Upon executing input 2, you can now expect to see: + +.. code-block:: ipython + + In[2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access + +and the example in input 4 will now produce: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior + .. _whatsnew_0210.enhancements.other: Other Enhancements From f3fdd19d6018a2144361323251a7ba419312b93b Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Wed, 19 Jul 2017 07:05:27 -0500 Subject: [PATCH 16/22] DOC: moves examples from whatsnew to indexing --- doc/source/indexing.rst | 35 +++++++++++++++++++++++++++++---- doc/source/whatsnew/v0.21.0.txt | 24 ++++++---------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 095c452c62701..0431d07a39241 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -227,10 +227,6 @@ as an attribute: dfa.A panel.one -You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it creates a new attribute rather than a -new column. This behavior will incur a ``UserWarning`` in 0.21.0 and later. - .. ipython:: python sa.a = 5 @@ -267,6 +263,37 @@ You can also assign a ``dict`` to a row of a ``DataFrame``: x.iloc[1] = dict(x=9, y=99) x +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Similarly, it is possible to create a column with a name which collides with one of Pandas's +built-in methods or attributes, which can cause confusion later when attempting to access +that column as an attribute. This behavior now warns: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior + In[5]: df.sum + Out[5]: + + Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0b3868c7792b3..ba357b5834dc6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -61,7 +61,7 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes -.. _whatsnew_0210.enhancements.column_creation: +.. _whatsnew_0210.enhancements.column-creation: Improved warnings when attempting to create columns ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -75,7 +75,7 @@ of this confusion include attempting to create a new column by setting into an a In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) In[2]: df.two = [4, 5, 6] -which does not raise any obvious exceptions, but also does not create a new column: +This does not raise any obvious exceptions, but also does not create a new column: .. code-block:: ipython @@ -86,14 +86,14 @@ which does not raise any obvious exceptions, but also does not create a new colu 1 2.0 2 3.0 -and creating a column whose name collides with a method or attribute already in the instance -namespace: +The second source of confusion is creating a column whose name collides with a method or +attribute already in the instance namespace: .. code-block:: ipython In[4]: df['sum'] = [5., 7., 9.] -which does not permit that column to be accessed as an attribute: +This does not permit that column to be accessed as an attribute: .. code-block:: ipython @@ -104,19 +104,7 @@ which does not permit that column to be accessed as an attribute: 1 2.0 7.0 2 3.0 9.0> -Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. Upon executing input 2, you can now expect to see: - -.. code-block:: ipython - - In[2]: df.two = [4, 5, 6] - UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access - -and the example in input 4 will now produce: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See `Attribute Access `__. .. _whatsnew_0210.enhancements.other: From c6fe06214caa7f48e8c028071a34ec6111d466e0 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Wed, 19 Jul 2017 07:06:54 -0500 Subject: [PATCH 17/22] FIX: replaces ndim check with ABC subclass check --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a24fb4e5a1e34..31020bd354e00 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -26,8 +26,8 @@ is_re_compilable, pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask -from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel +from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.common import (_values_from_object, _maybe_box_datetimelike, @@ -3361,7 +3361,7 @@ def __setattr__(self, name, value): else: object.__setattr__(self, name, value) except (AttributeError, TypeError): - if (self.ndim > 1) and (is_list_like(value)): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): warnings.warn("Pandas doesn't allow Series to be assigned " "into nonexistent columns - see " "https://pandas.pydata.org/pandas-docs/" From 8ad4d057b8ada3039fb955e78e1f83225bc84047 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sun, 23 Jul 2017 15:42:26 -0500 Subject: [PATCH 18/22] CLN: breaks line to avoid 81 char length --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 31020bd354e00..604f47b019b1c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1907,7 +1907,8 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if isinstance(key, string_types) and callable(getattr(self, key, None)): + if (isinstance(key, string_types) + and callable(getattr(self, key, None))): warnings.warn("Column name '{key}' collides with a built-in " "method, which will cause unexpected attribute " "behavior".format(key=key), stacklevel=3) From e5cc166a116123f3c5ba691c9ed1aa27dcfc23aa Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sun, 23 Jul 2017 21:50:44 -0500 Subject: [PATCH 19/22] TST: renames 'bool' columns in pytables test to avoid collision warning --- pandas/tests/io/test_pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fc17b5f85b68c..f33ba7627101e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['bool'] = df['float322'] > 0 + df['boolean'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 + df1['boolean'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102') From e02244dc5963d50818880c28e565ff53cdce5f3d Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Wed, 26 Jul 2017 23:41:46 -0600 Subject: [PATCH 20/22] BUG: fixes 'string_types' encoding failure with unicode cols in py2 As part of warning check, object type of potential attributes was checked for subtypes of pd.compat.str_types before being checked for overlap with methods defined on ndframes. This causes decode errors in Python2 when users attempt to add columns with unicode column names. Fix is to compare against `str`. --- pandas/core/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 604f47b019b1c..14603954ad1ef 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1907,8 +1907,7 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if (isinstance(key, string_types) - and callable(getattr(self, key, None))): + if isinstance(key, str) and callable(getattr(self, key, None)): warnings.warn("Column name '{key}' collides with a built-in " "method, which will cause unexpected attribute " "behavior".format(key=key), stacklevel=3) From d38ecca4a69e331f4b2cedcb93d3f69e5b047524 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Sat, 29 Jul 2017 10:47:25 -0600 Subject: [PATCH 21/22] BUG: fixes reversion of isnull/isna produced in merge --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14603954ad1ef..2d52eed81d22b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -26,7 +26,7 @@ is_re_compilable, pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.common import (_values_from_object, From b86546e92707483dbbdf0ee58f83311f2e896323 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Fri, 4 Aug 2017 07:13:50 -0500 Subject: [PATCH 22/22] DOC: fixes references in indexing and whatsnew --- doc/source/indexing.rst | 2 +- doc/source/whatsnew/v0.21.0.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 0431d07a39241..53a259ad6eb15 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -271,7 +271,7 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``: In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) In[2]: df.two = [4, 5, 6] - UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access In[3]: df Out[3]: one diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ba357b5834dc6..d9439e0d785f6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -61,10 +61,10 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes -.. _whatsnew_0210.enhancements.column-creation: +.. _whatsnew_0210.enhancements.attribute_access: Improved warnings when attempting to create columns -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ New users are often flummoxed by the relationship between column operations and attribute access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances @@ -104,7 +104,7 @@ This does not permit that column to be accessed as an attribute: 1 2.0 7.0 2 3.0 9.0> -Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See `Attribute Access `__. +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. .. _whatsnew_0210.enhancements.other: