Skip to content

Split fastpath IntegerArray constructor and general purpose constructor #22070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 1 addition & 1 deletion pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
from .period import PeriodArrayMixin # noqa
from .timedeltas import TimedeltaArrayMixin # noqa
from .integer import ( # noqa
IntegerArray, to_integer_array)
IntegerArray, integer_array)
40 changes: 24 additions & 16 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def construct_from_string(cls, string):
"'{}'".format(cls, string))


def to_integer_array(values, dtype=None):
def integer_array(values, dtype=None, copy=False):
"""
Infer and return an integer array of the values.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update doc-string


Expand All @@ -94,7 +94,8 @@ def to_integer_array(values, dtype=None):
------
TypeError if incompatible types
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there still a ref to to_integer_array? (I see it in the diff, but I also see that you changed it above)

return IntegerArray(values, dtype=dtype, copy=False)
values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
return IntegerArray(values, mask)


def safe_cast(values, dtype, copy):
Expand Down Expand Up @@ -206,7 +207,7 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
def dtype(self):
return _dtypes[str(self._data.dtype)]

def __init__(self, values, mask=None, dtype=None, copy=False):
def __init__(self, values, mask, copy=False):
"""
Parameters
----------
Expand All @@ -219,25 +220,33 @@ def __init__(self, values, mask=None, dtype=None, copy=False):
-------
IntegerArray
"""
self._data, self._mask = coerce_to_array(
values, dtype=dtype, mask=mask, copy=copy)
if not (isinstance(values, np.ndarray)
and np.issubdtype(values.dtype, np.integer)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is is_integer_dtype

raise TypeError("values should be integer numpy array")
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use is_bool_dtype

raise TypeError("mask should be boolean numpy array")

if copy:
values = values.copy()
mask = mask.copy()

self._data = values
self._mask = mask

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
return cls(scalars, dtype=dtype, copy=copy)
return integer_array(scalars, dtype=dtype, copy=copy)

@classmethod
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype)
return integer_array(values, dtype=original.dtype)

def __getitem__(self, item):
if is_integer(item):
if self._mask[item]:
return self.dtype.na_value
return self._data[item]
return type(self)(self._data[item],
mask=self._mask[item],
dtype=self.dtype)
return type(self)(self._data[item], self._mask[item])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback additional question: what do you find of writing IntegerArray(...) instead of type(self)(...) ?

Python perfectly allows that (and is the same here, as we don't subclass this one further), and I personally find that easier to read.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like using type() as I expect this to be subclasses for BooleanArray, though it could be that this not needed for that, but want to keep open that possiblitiy.


def _coerce_to_ndarray(self):
"""
Expand Down Expand Up @@ -294,7 +303,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
result[fill_mask] = fill_value
mask = mask ^ fill_mask

return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
return type(self)(result, mask, copy=False)

def copy(self, deep=False):
data, mask = self._data, self._mask
Expand All @@ -304,7 +313,7 @@ def copy(self, deep=False):
else:
data = data.copy()
mask = mask.copy()
return type(self)(data, mask, dtype=self.dtype, copy=False)
return type(self)(data, mask, copy=False)

def __setitem__(self, key, value):
_is_scalar = is_scalar(value)
Expand Down Expand Up @@ -356,7 +365,7 @@ def _na_value(self):
def _concat_same_type(cls, to_concat):
data = np.concatenate([x._data for x in to_concat])
mask = np.concatenate([x._mask for x in to_concat])
return cls(data, mask=mask, dtype=to_concat[0].dtype)
return cls(data, mask)

def astype(self, dtype, copy=True):
"""Cast to a NumPy array or IntegerArray with 'dtype'.
Expand Down Expand Up @@ -386,8 +395,7 @@ def astype(self, dtype, copy=True):
if isinstance(dtype, _IntegerDtype):
result = self._data.astype(dtype.numpy_dtype,
casting='same_kind', copy=False)
return type(self)(result, mask=self._mask,
dtype=dtype, copy=False)
return type(self)(result, mask=self._mask, copy=False)

# coerce
data = self._coerce_to_ndarray()
Expand Down Expand Up @@ -523,7 +531,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
result[mask] = np.nan
return result

return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
return type(self)(result, mask, copy=False)

@classmethod
def _create_arithmetic_method(cls, op):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if not (dtype is None or is_object_dtype(dtype)):

# coerce to the provided dtype
data = dtype.construct_array_type()(
data = dtype.construct_array_type()._from_sequence(
data, dtype=dtype, copy=False)

# coerce to the object dtype
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4095,7 +4095,7 @@ def _try_cast(arr, take_fast_path):
ordered=dtype.ordered)
elif is_extension_array_dtype(dtype):
# create an extension array from its dtype
array_type = dtype.construct_array_type()
array_type = dtype.construct_array_type()._from_sequence
subarr = array_type(subarr, dtype=dtype, copy=copy)

elif dtype is not None and raise_cast_failure:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_fillna_series_method(self, data_missing, method):
fill_value = data_missing[1]

if method == 'ffill':
data_missing = type(data_missing)(data_missing[::-1])
data_missing = data_missing[::-1]

result = pd.Series(data_missing).fillna(method=method)
expected = pd.Series(
Expand Down
25 changes: 12 additions & 13 deletions pandas/tests/extension/integer/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.core.dtypes.generic import ABCIndexClass

from pandas.core.arrays import (
to_integer_array, IntegerArray)
integer_array, IntegerArray)
from pandas.core.arrays.integer import (
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
Expand All @@ -31,12 +31,12 @@ def dtype(request):

@pytest.fixture
def data(dtype):
return IntegerArray(make_data(), dtype=dtype)
return integer_array(make_data(), dtype=dtype)


@pytest.fixture
def data_missing(dtype):
return IntegerArray([np.nan, 1], dtype=dtype)
return integer_array([np.nan, 1], dtype=dtype)


@pytest.fixture
Expand All @@ -49,12 +49,12 @@ def gen(count):

@pytest.fixture
def data_for_sorting(dtype):
return IntegerArray([1, 2, 0], dtype=dtype)
return integer_array([1, 2, 0], dtype=dtype)


@pytest.fixture
def data_missing_for_sorting(dtype):
return IntegerArray([1, np.nan, 0], dtype=dtype)
return integer_array([1, np.nan, 0], dtype=dtype)


@pytest.fixture
Expand All @@ -74,7 +74,7 @@ def data_for_grouping(dtype):
a = 0
c = 2
na = np.nan
return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)


def test_dtypes(dtype):
Expand Down Expand Up @@ -494,8 +494,7 @@ def test_construct_index(self, all_data, dropna):
else:
other = all_data

result = pd.Index(IntegerArray(other,
dtype=all_data.dtype))
result = pd.Index(integer_array(other, dtype=all_data.dtype))
expected = pd.Index(other, dtype=object)

self.assert_index_equal(result, expected)
Expand Down Expand Up @@ -584,14 +583,14 @@ def test_construct_cast_invalid(self, dtype):
msg = "cannot safely"
arr = [1.2, 2.3, 3.7]
with tm.assert_raises_regex(TypeError, msg):
IntegerArray(arr, dtype=dtype)
integer_array(arr, dtype=dtype)

with tm.assert_raises_regex(TypeError, msg):
pd.Series(arr).astype(dtype)

arr = [1.2, 2.3, 3.7, np.nan]
with tm.assert_raises_regex(TypeError, msg):
IntegerArray(arr, dtype=dtype)
integer_array(arr, dtype=dtype)

with tm.assert_raises_regex(TypeError, msg):
pd.Series(arr).astype(dtype)
Expand Down Expand Up @@ -658,7 +657,7 @@ def test_conversions(data_missing):
def test_to_integer_array_error(values):
# error in converting existing arrays to IntegerArrays
with pytest.raises(TypeError):
to_integer_array(values)
integer_array(values)


@pytest.mark.parametrize(
Expand All @@ -669,8 +668,8 @@ def test_to_integer_array_error(values):
(np.array([1, np.nan]), 'int8', Int8Dtype)])
def test_to_integer_array(values, to_dtype, result_dtype):
# convert existing arrays to IntegerArrays
result = to_integer_array(values, dtype=to_dtype)
expected = IntegerArray(values, dtype=result_dtype())
result = integer_array(values, dtype=to_dtype)
expected = integer_array(values, dtype=result_dtype())
tm.assert_extension_array_equal(result, expected)


Expand Down