Skip to content

Commit 2e30a9c

Browse files
committed
ENH: add integer-na support via an ExtensionArray
closes pandas-dev#20700
1 parent ba26474 commit 2e30a9c

27 files changed

+1105
-41
lines changed

pandas/conftest.py

+9
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ def all_arithmetic_operators(request):
9696
return request.param
9797

9898

99+
@pytest.fixture(params=['__eq__', '__ne__', '__le__',
100+
'__lt__', '__ge__', '__gt__'])
101+
def all_compare_operators(request):
102+
"""
103+
Fixture for dunder names for common compare operations
104+
"""
105+
return request.param
106+
107+
99108
@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
100109
pytest.param('xz', marks=td.skip_if_no_lzma)])
101110
def compression(request):

pandas/core/algorithms.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original):
154154
"""
155155
from pandas import Index
156156
if is_extension_array_dtype(dtype):
157-
pass
157+
values = dtype.array_type._from_sequence(values)
158158
elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
159159
values = Index(original)._shallow_copy(values, name=None)
160160
elif is_bool_dtype(dtype):
@@ -705,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
705705

706706
else:
707707

708-
if is_categorical_dtype(values) or is_sparse(values):
708+
if is_extension_array_dtype(values) or is_sparse(values):
709709

710710
# handle Categorical and sparse,
711711
result = Series(values)._values.value_counts(dropna=dropna)

pandas/core/arrays/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
from .base import ExtensionArray # noqa
22
from .categorical import Categorical # noqa
3+
from .integer import ( # noqa
4+
Int8Array, Int16Array, Int32Array, Int64Array,
5+
UInt8Array, UInt16Array, UInt32Array, UInt64Array,
6+
to_integer_array)

pandas/core/arrays/base.py

+43-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class ExtensionArray(object):
3636
* isna
3737
* take
3838
* copy
39+
* append
3940
* _concat_same_type
4041
4142
An additional method is available to satisfy pandas' internal,
@@ -49,6 +50,7 @@ class ExtensionArray(object):
4950
methods:
5051
5152
* fillna
53+
* dropna
5254
* unique
5355
* factorize / _values_for_factorize
5456
* argsort / _values_for_argsort
@@ -82,14 +84,16 @@ class ExtensionArray(object):
8284
# Constructors
8385
# ------------------------------------------------------------------------
8486
@classmethod
85-
def _from_sequence(cls, scalars):
87+
def _from_sequence(cls, scalars, copy=False):
8688
"""Construct a new ExtensionArray from a sequence of scalars.
8789
8890
Parameters
8991
----------
9092
scalars : Sequence
9193
Each element will be an instance of the scalar type for this
9294
array, ``cls.dtype.type``.
95+
copy : boolean, default True
96+
if True, copy the underlying data
9397
Returns
9498
-------
9599
ExtensionArray
@@ -379,6 +383,16 @@ def fillna(self, value=None, method=None, limit=None):
379383
new_values = self.copy()
380384
return new_values
381385

386+
def dropna(self):
387+
""" Return ExtensionArray without NA values
388+
389+
Returns
390+
-------
391+
valid : ExtensionArray
392+
"""
393+
394+
return self[~self.isna()]
395+
382396
def unique(self):
383397
"""Compute the ExtensionArray of unique values.
384398
@@ -567,6 +581,34 @@ def copy(self, deep=False):
567581
"""
568582
raise AbstractMethodError(self)
569583

584+
def append(self, other):
585+
"""
586+
Append a collection of Arrays together
587+
588+
Parameters
589+
----------
590+
other : ExtenionArray or list/tuple of ExtenionArrays
591+
592+
Returns
593+
-------
594+
appended : ExtensionArray
595+
"""
596+
597+
to_concat = [self]
598+
cls = self.__class__
599+
600+
if isinstance(other, (list, tuple)):
601+
to_concat = to_concat + list(other)
602+
else:
603+
to_concat.append(other)
604+
605+
for obj in to_concat:
606+
if not isinstance(obj, cls):
607+
raise TypeError('all inputs must be of type {}'.format(
608+
cls.__name__))
609+
610+
return cls._concat_same_type(to_concat)
611+
570612
# ------------------------------------------------------------------------
571613
# Block-related methods
572614
# ------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+4
Original file line numberDiff line numberDiff line change
@@ -2343,6 +2343,10 @@ def isin(self, values):
23432343
return algorithms.isin(self.codes, code_values)
23442344

23452345

2346+
# inform the Dtype about us
2347+
CategoricalDtype.array_type = Categorical
2348+
2349+
23462350
# The Series.cat accessor
23472351

23482352

0 commit comments

Comments
 (0)