From b985ea8c514c5c7ac76740105b2968ca86aa2104 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 23 Feb 2018 09:29:51 -0600 Subject: [PATCH 01/17] ENH: ExtensionArray.setitem --- pandas/core/internals.py | 36 +++--- pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/setitem.py | 116 ++++++++++++++++++ .../extension/category/test_categorical.py | 4 + .../tests/extension/decimal/test_decimal.py | 4 + pandas/tests/extension/json/test_json.py | 3 + pandas/util/_validators.py | 49 +++++++- 7 files changed, 192 insertions(+), 21 deletions(-) create mode 100644 pandas/tests/extension/base/setitem.py diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 00ef8f9cef598..08c1871e86fec 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -65,7 +65,7 @@ import pandas.core.algorithms as algos from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import maybe_convert_indices, length_of_indexer +from pandas.core.indexing import maybe_convert_indices from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -79,7 +79,8 @@ from pandas._libs.tslibs import conversion from pandas.util._decorators import cache_readonly -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import ( + validate_bool_kwarg, validate_setitem_lengths) from pandas import compat from pandas.compat import range, map, zip, u @@ -874,22 +875,7 @@ def setitem(self, indexer, value, mgr=None): values = transf(values) # length checking - # boolean with truth values == len of the value is ok too - if isinstance(indexer, (np.ndarray, list)): - if is_list_like(value) and len(indexer) != len(value): - if not (isinstance(indexer, np.ndarray) and - indexer.dtype == np.bool_ and - len(indexer[indexer]) == len(value)): - raise ValueError("cannot set using a list-like indexer " - "with a different length than the value") - - # slice - elif isinstance(indexer, slice): - - if is_list_like(value) and len(values): - if len(value) != length_of_indexer(indexer, values): - raise ValueError("cannot set using a slice indexer with a " - "different length than the value") + validate_setitem_lengths(indexer, value, values) def _is_scalar_indexer(indexer): # return True if we are all scalar indexers @@ -1898,6 +1884,15 @@ def is_view(self): """Extension arrays are never treated as views.""" return False + def setitem(self, indexer, value, mgr=None): + if isinstance(indexer, tuple): + # we are always 1-D + indexer = indexer[0] + + validate_setitem_lengths(indexer, value, self.values) + self.values[indexer] = value + return self + def get_values(self, dtype=None): # ExtensionArrays must be iterable, so this works. values = np.asarray(self.values) @@ -3489,7 +3484,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, # with a .values attribute. aligned_args = dict((k, kwargs[k]) for k in align_keys - if hasattr(kwargs[k], 'values')) + if hasattr(kwargs[k], 'values') and + not isinstance(kwargs[k], ABCExtensionArray)) for b in self.blocks: if filter is not None: @@ -5190,7 +5186,7 @@ def _safe_reshape(arr, new_shape): If possible, reshape `arr` to have shape `new_shape`, with a couple of exceptions (see gh-13012): - 1) If `arr` is a Categorical or Index, `arr` will be + 1) If `arr` is a ExtensionArray or Index, `arr` will be returned as is. 2) If `arr` is a Series, the `_values` attribute will be reshaped and returned. diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 27c106efd0524..4ea6db885d9c6 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -48,3 +48,4 @@ class TestMyDtype(BaseDtypeTests): from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa from .reshaping import BaseReshapingTests # noqa +from .setitem import BaseSetitemTests # noqa diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py new file mode 100644 index 0000000000000..6b32e6f955b5e --- /dev/null +++ b/pandas/tests/extension/base/setitem.py @@ -0,0 +1,116 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from .base import BaseExtensionTests + + +class BaseSetitemTests(BaseExtensionTests): + def test_setitem_scalar_series(self, data): + arr = pd.Series(data) + arr[0] = data[1] + assert arr[0] == data[1] + + def test_setitem_sequence(self, data): + arr = pd.Series(data) + original = data.copy() + + arr[[0, 1]] = [data[1], data[0]] + assert arr[0] == original[1] + assert arr[1] == original[0] + + @pytest.mark.parametrize('as_array', [True, False]) + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + ser = pd.Series(data) + value = [data[0]] + if as_array: + value = type(data)(value) + + xpr = 'cannot set using a {} indexer with a different length' + with tm.assert_raises_regex(ValueError, xpr.format('list-like')): + ser[[0, 1]] = value + + with tm.assert_raises_regex(ValueError, xpr.format('slice')): + ser[slice(3)] = value + + def test_setitem_empty_indxer(self, data): + ser = pd.Series(data) + original = ser.copy() + ser[[]] = [] + tm.assert_series_equal(ser, original) + + def test_setitem_sequence_broadcasts(self, data): + arr = pd.Series(data) + + arr[[0, 1]] = data[2] + assert arr[0] == data[2] + assert arr[1] == data[2] + + @pytest.mark.parametrize('setter', ['loc', 'iloc']) + def test_set_scalar(self, data, setter): + arr = pd.Series(data) + setter = getattr(arr, setter) + operator.setitem(setter, 0, data[1]) + assert arr[0] == data[1] + + def test_set_loc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.loc[0, 'B'] = data[1] + assert df.loc[0, 'B'] == data[1] + + def test_set_loc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.loc[10, 'B'] = data[1] + assert df.loc[10, 'B'] == data[1] + + def test_set_loc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.loc[10, 'B'] = data[1] + assert df.loc[10, 'B'] == data[1] + + def test_set_iloc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.iloc[0, 1] = data[1] + assert df.loc[0, 'B'] == data[1] + + def test_set_iloc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.iloc[10, 0] = data[1] + assert df.loc[10, 'B'] == data[1] + + def test_set_iloc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.iloc[10, 1] = data[1] + assert df.loc[10, 'B'] == data[1] + + @pytest.mark.parametrize('as_callable', [True, False]) + def test_set_mask_aligned(self, data, as_callable): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if as_callable: + mask2 = lambda x: mask + else: + mask2 = mask + + ser[mask2] = data[5:7] + assert ser[0] == data[5] + assert ser[1] == data[6] + + def test_set_mask_broadcast(self, data): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + ser[mask] = data[10] + assert ser[0] == data[10] + assert ser[1] == data[10] + + def test_setitem_expand_columns(self, data): + df = pd.DataFrame({"A": data}) + df['B'] = 1 + assert len(df.columns) == 2 diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 8f413b4a19730..60e477de525df 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -68,6 +68,10 @@ def test_getitem_scalar(self): pass +class TestSetitem(base.BaseSetitemTests): + pass + + class TestMissing(base.BaseMissingTests): pass diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7b4d079ecad87..59b2e5bcd6084 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -116,6 +116,10 @@ class TestCasting(base.BaseCastingTests): pass +class TestDecimal(base.BaseSetitemTests): + pass + + def test_series_constructor_coerce_data_to_extension_dtype_raises(): xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " "extension array directly.") diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e0721bb1d8d1a..4f9f4288406bc 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -71,3 +71,6 @@ def test_value_counts(self, all_data, dropna): class TestCasting(base.BaseCastingTests): pass + +# We intentionally don't run base.BaseSetitemTests because pandas' +# internals has trouble setting sequences of values into scalar positions. diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index a96563051e7de..00876e1f029b7 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -4,7 +4,9 @@ """ import warnings -from pandas.core.dtypes.common import is_bool +import numpy as np + +from pandas.core.dtypes.common import is_bool, is_list_like def _check_arg_length(fname, args, max_fname_arg_count, compat_args): @@ -356,3 +358,48 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): raise ValueError("Cannot specify both 'value' and 'method'.") return value, method + + +def validate_setitem_lengths(indexer, value, values): + """Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + The key for the setitem + value : array-like + The value for the setitem + values : array-like + The values being set into + + Returns + ------- + None + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't + match. + """ + from pandas.core.indexing import length_of_indexer + + # boolean with truth values == len of the value is ok too + if isinstance(indexer, (np.ndarray, list)): + if is_list_like(value) and len(indexer) != len(value): + if not (isinstance(indexer, np.ndarray) and + indexer.dtype == np.bool_ and + len(indexer[indexer]) == len(value)): + raise ValueError("cannot set using a list-like indexer " + "with a different length than the value") + # slice + elif isinstance(indexer, slice): + + if is_list_like(value) and len(values): + if len(value) != length_of_indexer(indexer, values): + raise ValueError("cannot set using a slice indexer with a " + "different length than the value") From 9489f6c141dfce32fb984061b3b94df13ce7b8fd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Feb 2018 12:49:46 -0600 Subject: [PATCH 02/17] Avoid tm.assert --- pandas/tests/extension/base/setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 6b32e6f955b5e..d5d01d02221a2 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -40,7 +40,7 @@ def test_setitem_empty_indxer(self, data): ser = pd.Series(data) original = ser.copy() ser[[]] = [] - tm.assert_series_equal(ser, original) + self.assert_series_equal(ser, original) def test_setitem_sequence_broadcasts(self, data): arr = pd.Series(data) From 79da90e8696ddc5921ba3b7e8b42eb369eca11c6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 06:08:29 -0600 Subject: [PATCH 03/17] Docstring --- pandas/core/internals.py | 43 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 08c1871e86fec..fda9f100f7841 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -816,11 +816,24 @@ def _replace_single(self, *args, **kwargs): return self if kwargs['inplace'] else self.copy() def setitem(self, indexer, value, mgr=None): - """ set the value inplace; return a new block (of a possibly different - dtype) + """Set the value inplace, returning a a maybe different typed block. - indexer is a direct slice/positional indexer; value must be a - compatible shape + Parameters + ---------- + indexer : tuple, list-like, array-like, slice + The subset of self.values to set + value : object + The value being set + mgr : BlockPlacement + + Returns + ------- + Block + + Notes + ----- + 'indexer' is a direct slice/positional indexer. 'value' must + be a compatible shape. """ # coerce None values, if appropriate if value is None: @@ -1885,6 +1898,28 @@ def is_view(self): return False def setitem(self, indexer, value, mgr=None): + """Set the value inplace, returning a same-typed block. + + This differs from Block.setitem by not allowing setitem to change + the dtype of the Block. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice + The subset of self.values to set + value : object + The value being set + mgr : BlockPlacement + + Returns + ------- + Block + + Notes + ----- + 'indexer' is a direct slice/positional indexer. 'value' must + be a compatible shape. + """ if isinstance(indexer, tuple): # we are always 1-D indexer = indexer[0] From 7f65c5a8fb65d917e9bbf36554f2766ad0f0a4ee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 10:00:05 -0600 Subject: [PATCH 04/17] Alias for common data attributes --- pandas/tests/extension/decimal/array.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 8b2eaadeca99e..8d6277eb566ef 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -31,6 +31,11 @@ def __init__(self, values): values = np.asarray(values, dtype=object) self.values = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._values = self._items = self._data = self.data = self.values + + def __getitem__(self, item): if isinstance(item, numbers.Integral): From 274da1344fc3cd452dbd488d1771b098e05f8fc6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 20:55:56 -0600 Subject: [PATCH 05/17] Additional tests --- pandas/tests/extension/base/setitem.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index d5d01d02221a2..98590944f25b7 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -112,5 +112,22 @@ def test_set_mask_broadcast(self, data): def test_setitem_expand_columns(self, data): df = pd.DataFrame({"A": data}) - df['B'] = 1 - assert len(df.columns) == 2 + result = df.copy() + result['B'] = 1 + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, 'B'] = 1 + self.assert_frame_equal(result, expected) + + def test_setitem_expand_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + result = df.copy() + result['B'] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, 'B'] = data + self.assert_frame_equal(result, expected) From 35ae908a8d118edc4999191b8934c50e6d80df56 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Feb 2018 06:19:49 -0600 Subject: [PATCH 06/17] Linting --- pandas/tests/extension/decimal/array.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 8d6277eb566ef..f3dccd913d8f3 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -35,8 +35,6 @@ def __init__(self, values): # these self._values = self._items = self._data = self.data = self.values - - def __getitem__(self, item): if isinstance(item, numbers.Integral): return self.values[item] From 76f6e867c38dfd094a5a568c54d7cff561b87ee2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 07:30:09 -0500 Subject: [PATCH 07/17] Move file --- pandas/core/indexing.py | 45 ++++++++++++++++++++++++++++++++++++++ pandas/core/internals.py | 10 ++++----- pandas/util/_validators.py | 45 -------------------------------------- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9736bba78ab72..13b6ef2b1ed04 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2310,6 +2310,51 @@ def check_bool_indexer(ax, key): return result +def check_setitem_lengths(indexer, value, values): + """Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + The key for the setitem + value : array-like + The value for the setitem + values : array-like + The values being set into + + Returns + ------- + None + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't + match. + """ + from pandas.core.indexing import length_of_indexer + + # boolean with truth values == len of the value is ok too + if isinstance(indexer, (np.ndarray, list)): + if is_list_like(value) and len(indexer) != len(value): + if not (isinstance(indexer, np.ndarray) and + indexer.dtype == np.bool_ and + len(indexer[indexer]) == len(value)): + raise ValueError("cannot set using a list-like indexer " + "with a different length than the value") + # slice + elif isinstance(indexer, slice): + + if is_list_like(value) and len(values): + if len(value) != length_of_indexer(indexer, values): + raise ValueError("cannot set using a slice indexer with a " + "different length than the value") + + def convert_missing_indexer(indexer): """ reverse convert a missing indexer, which is a dict return the scalar indexer and a boolean indicating if we converted diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 735e393849003..b98f2d590b842 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -66,7 +66,7 @@ import pandas.core.algorithms as algos from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import maybe_convert_indices +from pandas.core.indexing import maybe_convert_indices, check_setitem_lengths from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -80,8 +80,7 @@ from pandas._libs.tslibs import conversion from pandas.util._decorators import cache_readonly -from pandas.util._validators import ( - validate_bool_kwarg, validate_setitem_lengths) +from pandas.util._validators import validate_bool_kwarg from pandas import compat from pandas.compat import range, map, zip, u @@ -890,7 +889,7 @@ def setitem(self, indexer, value, mgr=None): values = transf(values) # length checking - validate_setitem_lengths(indexer, value, values) + check_setitem_lengths(indexer, value, values) def _is_scalar_indexer(indexer): # return True if we are all scalar indexers @@ -1922,11 +1921,12 @@ def setitem(self, indexer, value, mgr=None): 'indexer' is a direct slice/positional indexer. 'value' must be a compatible shape. """ + print(type(indexer), indexer) if isinstance(indexer, tuple): # we are always 1-D indexer = indexer[0] - validate_setitem_lengths(indexer, value, self.values) + check_setitem_lengths(indexer, value, self.values) self.values[indexer] = value return self diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 00876e1f029b7..5811cc5333a61 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -358,48 +358,3 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): raise ValueError("Cannot specify both 'value' and 'method'.") return value, method - - -def validate_setitem_lengths(indexer, value, values): - """Validate that value and indexer are the same length. - - An special-case is allowed for when the indexer is a boolean array - and the number of true values equals the length of ``value``. In - this case, no exception is raised. - - Parameters - ---------- - indexer : sequence - The key for the setitem - value : array-like - The value for the setitem - values : array-like - The values being set into - - Returns - ------- - None - - Raises - ------ - ValueError - When the indexer is an ndarray or list and the lengths don't - match. - """ - from pandas.core.indexing import length_of_indexer - - # boolean with truth values == len of the value is ok too - if isinstance(indexer, (np.ndarray, list)): - if is_list_like(value) and len(indexer) != len(value): - if not (isinstance(indexer, np.ndarray) and - indexer.dtype == np.bool_ and - len(indexer[indexer]) == len(value)): - raise ValueError("cannot set using a list-like indexer " - "with a different length than the value") - # slice - elif isinstance(indexer, slice): - - if is_list_like(value) and len(values): - if len(value) != length_of_indexer(indexer, values): - raise ValueError("cannot set using a slice indexer with a " - "different length than the value") From 2d5b08cb1957e9b781eea419b524cae9c04f500b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 07:30:49 -0500 Subject: [PATCH 08/17] BUG: Fixed length validation --- pandas/core/frame.py | 3 +++ pandas/tests/extension/base/setitem.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efb002474f876..c0033e3558484 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3204,7 +3204,10 @@ def reindexer(value): value = reindexer(value).T elif isinstance(value, ExtensionArray): + from pandas.core.series import _sanitize_index value = value.copy() + # Copy donesn't have any effect at the moment + value = _sanitize_index(value, self.index, copy=False) elif isinstance(value, Index) or is_sequence(value): from pandas.core.series import _sanitize_index diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 98590944f25b7..b49ddd7ba601b 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -131,3 +131,9 @@ def test_setitem_expand_with_extension(self, data): result = df.copy() result.loc[:, 'B'] = data self.assert_frame_equal(result, expected) + + def test_setitem_frame_invalid_length(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + xpr = "Length of values does not match length of index" + with tm.assert_raises_regex(ValueError, xpr): + df['B'] = data[:5] From f66c093de3954672fca06577aa4d4e265cb13fdc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 09:26:41 -0500 Subject: [PATCH 09/17] Removed print --- pandas/core/internals.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b98f2d590b842..f16af27397b64 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1921,7 +1921,6 @@ def setitem(self, indexer, value, mgr=None): 'indexer' is a direct slice/positional indexer. 'value' must be a compatible shape. """ - print(type(indexer), indexer) if isinstance(indexer, tuple): # we are always 1-D indexer = indexer[0] From 43dfd7d815f540296e94431434ddfb3d88d1a3e3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 11:09:22 -0500 Subject: [PATCH 10/17] Fixed setitem for tuples All our extension array setitem tests were hitting this. We only caught it because only tuple reraised. --- pandas/_libs/index.pyx | 13 ++++++++----- pandas/tests/extension/base/setitem.py | 6 ++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 6b23e487aad3a..aad0de7e47910 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -120,20 +120,23 @@ cdef class IndexEngine: return Timedelta(util.get_value_at(arr, loc)) return util.get_value_at(arr, loc) - cpdef set_value(self, ndarray arr, object key, object value): + cpdef set_value(self, object arr, object key, object value): """ - arr : 1-dimensional ndarray + arr : ndarray or ExtensionArray + Should be 1 dimensional. """ cdef: object loc void* data_ptr loc = self.get_loc(key) - value = convert_scalar(arr, value) + if cnp.PyArray_Check(arr): + value = convert_scalar(arr, value) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if (PySlice_Check(loc) or cnp.PyArray_Check(loc) or + not cnp.PyArray_Check(arr)): arr[loc] = value - else: + elif cnp.PyArray_Check(arr): util.set_value_at(arr, loc, value) cpdef get_loc(self, object val): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index b49ddd7ba601b..01f9ec1d6f644 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -137,3 +137,9 @@ def test_setitem_frame_invalid_length(self, data): xpr = "Length of values does not match length of index" with tm.assert_raises_regex(ValueError, xpr): df['B'] = data[:5] + + def test_setitem_tuple_index(self, data): + s = pd.Series(data[:2], index=[(0, 0), (0, 1)]) + expected = pd.Series(data.take([1, 1]), index=s.index) + s[0] = data[1] + self.assert_series_equal(s, expected) From 5c1d934f7aded4eaa8486508bcffbc11401d88fa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 13:26:10 -0500 Subject: [PATCH 11/17] Revert setitem changes --- pandas/_libs/index.pyx | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index aad0de7e47910..6b23e487aad3a 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -120,23 +120,20 @@ cdef class IndexEngine: return Timedelta(util.get_value_at(arr, loc)) return util.get_value_at(arr, loc) - cpdef set_value(self, object arr, object key, object value): + cpdef set_value(self, ndarray arr, object key, object value): """ - arr : ndarray or ExtensionArray - Should be 1 dimensional. + arr : 1-dimensional ndarray """ cdef: object loc void* data_ptr loc = self.get_loc(key) - if cnp.PyArray_Check(arr): - value = convert_scalar(arr, value) + value = convert_scalar(arr, value) - if (PySlice_Check(loc) or cnp.PyArray_Check(loc) or - not cnp.PyArray_Check(arr)): + if PySlice_Check(loc) or cnp.PyArray_Check(loc): arr[loc] = value - elif cnp.PyArray_Check(arr): + else: util.set_value_at(arr, loc, value) cpdef get_loc(self, object val): From 66bbe9a8c9ea8c273508c5beade6699e6f666305 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 13:33:36 -0500 Subject: [PATCH 12/17] Xfail that test --- pandas/tests/extension/base/setitem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 01f9ec1d6f644..1ea4f1c49ed51 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -138,8 +138,9 @@ def test_setitem_frame_invalid_length(self, data): with tm.assert_raises_regex(ValueError, xpr): df['B'] = data[:5] + @pytest.mark.xfail(reason="GH-20441: setitem on extension types.") def test_setitem_tuple_index(self, data): s = pd.Series(data[:2], index=[(0, 0), (0, 1)]) expected = pd.Series(data.take([1, 1]), index=s.index) - s[0] = data[1] + s[(0, 1)] = data[1] self.assert_series_equal(s, expected) From f47ddf27ef2d776e9527843494478c6637e18ba7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Mar 2018 16:58:58 -0500 Subject: [PATCH 13/17] Linting --- pandas/util/_validators.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 5811cc5333a61..a96563051e7de 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -4,9 +4,7 @@ """ import warnings -import numpy as np - -from pandas.core.dtypes.common import is_bool, is_list_like +from pandas.core.dtypes.common import is_bool def _check_arg_length(fname, args, max_fname_arg_count, compat_args): From 1e5a14cac2c6e1192c9e7a00d9d6ab24fb86ed49 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 24 Mar 2018 07:15:25 -0500 Subject: [PATCH 14/17] Test updates --- pandas/tests/extension/base/setitem.py | 41 +++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 1ea4f1c49ed51..e91345b504d86 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -50,44 +50,45 @@ def test_setitem_sequence_broadcasts(self, data): assert arr[1] == data[2] @pytest.mark.parametrize('setter', ['loc', 'iloc']) - def test_set_scalar(self, data, setter): + def test_setitem_scalar(self, data, setter): arr = pd.Series(data) setter = getattr(arr, setter) operator.setitem(setter, 0, data[1]) assert arr[0] == data[1] - def test_set_loc_scalar_mixed(self, data): + def test_setitem_loc_scalar_mixed(self, data): df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) df.loc[0, 'B'] = data[1] assert df.loc[0, 'B'] == data[1] - def test_set_loc_scalar_single(self, data): + def test_setitem_loc_scalar_single(self, data): df = pd.DataFrame({"B": data}) df.loc[10, 'B'] = data[1] assert df.loc[10, 'B'] == data[1] - def test_set_loc_scalar_multiple_homogoneous(self, data): + def test_setitem_loc_scalar_multiple_homogoneous(self, data): df = pd.DataFrame({"A": data, "B": data}) df.loc[10, 'B'] = data[1] assert df.loc[10, 'B'] == data[1] - def test_set_iloc_scalar_mixed(self, data): + def test_setitem_iloc_scalar_mixed(self, data): df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) df.iloc[0, 1] = data[1] assert df.loc[0, 'B'] == data[1] - def test_set_iloc_scalar_single(self, data): + def test_setitem_iloc_scalar_single(self, data): df = pd.DataFrame({"B": data}) df.iloc[10, 0] = data[1] assert df.loc[10, 'B'] == data[1] - def test_set_iloc_scalar_multiple_homogoneous(self, data): + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): df = pd.DataFrame({"A": data, "B": data}) df.iloc[10, 1] = data[1] assert df.loc[10, 'B'] == data[1] @pytest.mark.parametrize('as_callable', [True, False]) - def test_set_mask_aligned(self, data, as_callable): + @pytest.mark.parametrize('setter', ['loc', None]) + def test_setitem_mask_aligned(self, data, as_callable, setter): ser = pd.Series(data) mask = np.zeros(len(data), dtype=bool) mask[:2] = True @@ -97,16 +98,31 @@ def test_set_mask_aligned(self, data, as_callable): else: mask2 = mask + if setter: + # loc + target = getattr(ser, setter) + else: + # Series.__setitem__ + target = ser + + operator.setitem(target, mask2, data[5:7]) + ser[mask2] = data[5:7] assert ser[0] == data[5] assert ser[1] == data[6] - def test_set_mask_broadcast(self, data): + @pytest.mark.parametrize('setter', ['loc', None]) + def test_setitem_mask_broadcast(self, data, setter): ser = pd.Series(data) mask = np.zeros(len(data), dtype=bool) mask[:2] = True - ser[mask] = data[10] + if setter: # loc + target = getattr(ser, setter) + else: # __setitem__ + target = ser + + operator.setitem(target, mask, data[10]) assert ser[0] == data[10] assert ser[1] == data[10] @@ -121,6 +137,11 @@ def test_setitem_expand_columns(self, data): result.loc[:, 'B'] = 1 self.assert_frame_equal(result, expected) + # overwrite with new type + result['B'] = data + expected = pd.DataFrame({"A": data, "B": data}) + self.assert_frame_equal(result, expected) + def test_setitem_expand_with_extension(self, data): df = pd.DataFrame({"A": [1] * len(data)}) result = df.copy() From abe734de3b9df0b261aef857725e1698206961a1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 21:18:16 -0500 Subject: [PATCH 15/17] Import, comment --- pandas/core/frame.py | 3 ++- pandas/core/indexing.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c0033e3558484..85698708f67db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3205,8 +3205,9 @@ def reindexer(value): elif isinstance(value, ExtensionArray): from pandas.core.series import _sanitize_index + # Explicitly copy here, instead of in _sanitize_index, + # as sanitize_index won't copy an EA, even with copy=True value = value.copy() - # Copy donesn't have any effect at the moment value = _sanitize_index(value, self.index, copy=False) elif isinstance(value, Index) or is_sequence(value): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13b6ef2b1ed04..5240a4703c242 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2336,8 +2336,6 @@ def check_setitem_lengths(indexer, value, values): When the indexer is an ndarray or list and the lengths don't match. """ - from pandas.core.indexing import length_of_indexer - # boolean with truth values == len of the value is ok too if isinstance(indexer, (np.ndarray, list)): if is_list_like(value) and len(indexer) != len(value): From 9a5b8c9c35fa9ef0d9d5a61a6113fbad441659c2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Mar 2018 07:05:27 -0500 Subject: [PATCH 16/17] Removed the _values alias that was causing issues with factorize. --- pandas/tests/extension/decimal/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 29bd1e4505964..f93d11f579f11 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -33,7 +33,7 @@ def __init__(self, values): self.values = values # Some aliases for common attribute names to ensure pandas supports # these - self._values = self._items = self._data = self.data = self.values + self._items = self._data = self.data = self.values @classmethod def _constructor_from_sequence(cls, scalars): @@ -65,7 +65,7 @@ def __len__(self): return len(self.values) def __repr__(self): - return repr(self.values) + return 'DecimalArray({!r})'.format(self.values) @property def nbytes(self): From 3cbe078f3675c293d934530dccc81cfb2904c9b3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 15 Apr 2018 14:01:59 -0500 Subject: [PATCH 17/17] DOC: Fixup docstrings --- pandas/core/internals.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 50603153759a6..37d11296400be 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -825,7 +825,7 @@ def setitem(self, indexer, value, mgr=None): The subset of self.values to set value : object The value being set - mgr : BlockPlacement + mgr : BlockPlacement, optional Returns ------- @@ -833,7 +833,7 @@ def setitem(self, indexer, value, mgr=None): Notes ----- - 'indexer' is a direct slice/positional indexer. 'value' must + `indexer` is a direct slice/positional indexer. `value` must be a compatible shape. """ # coerce None values, if appropriate @@ -1910,7 +1910,7 @@ def setitem(self, indexer, value, mgr=None): The subset of self.values to set value : object The value being set - mgr : BlockPlacement + mgr : BlockPlacement, optional Returns ------- @@ -1918,7 +1918,7 @@ def setitem(self, indexer, value, mgr=None): Notes ----- - 'indexer' is a direct slice/positional indexer. 'value' must + `indexer` is a direct slice/positional indexer. `value` must be a compatible shape. """ if isinstance(indexer, tuple):