pandas-dev · mroeschke · May 8, 2022 · May 9, 2022 · May 9, 2022
diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -47,6 +47,14 @@
 
 from pandas.core.api import (
     # dtype
+    Int8ArrowDtype,
+    Int16ArrowDtype,
+    Int32ArrowDtype,
+    Int64ArrowDtype,
+    UInt8ArrowDtype,
+    UInt16ArrowDtype,
+    UInt32ArrowDtype,
+    UInt64ArrowDtype,
     Int8Dtype,
     Int16Dtype,
     Int32Dtype,

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -27,6 +27,16 @@
     value_counts,
 )
 from pandas.core.arrays import Categorical
+from pandas.core.arrays.arrow.integer import (
+    Int8ArrowDtype,
+    Int16ArrowDtype,
+    Int32ArrowDtype,
+    Int64ArrowDtype,
+    UInt8ArrowDtype,
+    UInt16ArrowDtype,
+    UInt32ArrowDtype,
+    UInt64ArrowDtype,
+)
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.floating import (
     Float32Dtype,

diff --git a/pandas/core/arrays/arrow/integer.py b/pandas/core/arrays/arrow/integer.py
@@ -0,0 +1,150 @@
+from __future__ import annotations
+
+import pyarrow as pa
+
+from pandas.core.dtypes.base import register_extension_dtype
+
+from pandas.core.arrays.arrow.numeric import (
+    NumericArrowArray,
+    NumericArrowDtype,
+)
+
+
+class IntegerArrowDtype(NumericArrowDtype):
+    """
+    An ExtensionDtype to hold a single size & kind of integer Arrow dtype.
+
+    These specific implementations are subclasses of the non-public
+    IntegerArrowDtype. For example we have Int8ArrowDtype to represent signed int 8s.
+
+    The attributes name & type are set when these subclasses are created.
+    """
+
+    _default_pa_dtype = pa.int64()
+    _dtype_checker = pa.types.is_integer
+
+    @classmethod
+    def construct_array_type(cls) -> type[IntegerArrowArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return IntegerArrowArray
+
+    @classmethod
+    def _str_to_dtype_mapping(cls):
+        return INT_STR_TO_DTYPE
+
+
+class IntegerArrowArray(NumericArrowArray):
+    """
+    Array of pyarrow integer values.
+
+    To construct an IntegerArray from generic array-like ipaut, use
+    :func:`pandas.array` with one of the integer dtypes (see examples).
+
+    Parameters
+    ----------
+    values : pa.ChunkedArray
+        A 1-d integer-dtype array.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    IntegerArrowArray
+    """
+
+    _dtype_cls = IntegerArrowDtype
+
+
+_dtype_docstring = """
+An ExtensionDtype for {dtype} integer pyarrow data.
+
+Attributes
+----------
+None
+
+Methods
+-------
+None
+"""
+
+# create the Dtype
+
+
+@register_extension_dtype
+class Int8ArrowDtype(IntegerArrowDtype):
+    type = pa.int8()
+    name = "int8"
+    __doc__ = _dtype_docstring.format(dtype="int8")
+
+
+@register_extension_dtype
+class Int16ArrowDtype(IntegerArrowDtype):
+    type = pa.int16()
+    name = "int16"
+    __doc__ = _dtype_docstring.format(dtype="int16")
+
+
+@register_extension_dtype
+class Int32ArrowDtype(IntegerArrowDtype):
+    type = pa.int32()
+    name = "int32"
+    __doc__ = _dtype_docstring.format(dtype="int32")
+
+
+@register_extension_dtype
+class Int64ArrowDtype(IntegerArrowDtype):
+    type = pa.int64()
+    name = "int64"
+    __doc__ = _dtype_docstring.format(dtype="int64")
+
+
+@register_extension_dtype
+class UInt8ArrowDtype(IntegerArrowDtype):
+    type = pa.uint8()
+    name = "uint8"
+    __doc__ = _dtype_docstring.format(dtype="uint8")
+
+
+@register_extension_dtype
+class UInt16ArrowDtype(IntegerArrowDtype):
+    type = pa.uint16()
+    name = "uint16"
+    __doc__ = _dtype_docstring.format(dtype="uint16")
+
+
+@register_extension_dtype
+class UInt32ArrowDtype(IntegerArrowDtype):
+    type = pa.uint32()
+    name = "uint32"
+    __doc__ = _dtype_docstring.format(dtype="uint32")
+
+
+@register_extension_dtype
+class UInt64ArrowDtype(IntegerArrowDtype):
+    type = pa.uint64()
+    name = "uint64"
+    __doc__ = _dtype_docstring.format(dtype="uint64")
+
+
+INT_STR_TO_DTYPE: dict[str, IntegerArrowDtype] = {
+    "int8": Int8ArrowDtype(),
+    "int16": Int16ArrowDtype(),
+    "int32": Int32ArrowDtype(),
+    "int64": Int64ArrowDtype(),
+    "uint8": UInt8ArrowDtype(),
+    "uint16": UInt16ArrowDtype(),
+    "uint32": UInt32ArrowDtype(),
+    "uint64": UInt64ArrowDtype(),
+}
diff --git a/pandas/core/arrays/arrow/numeric.py b/pandas/core/arrays/arrow/numeric.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Callable,
+    TypeVar,
+)
+
+import pyarrow as pa
+
+from pandas.errors import AbstractMethodError
+from pandas.util._decorators import cache_readonly
+
+from pandas.core.arrays.arrow.array import ArrowExtensionArray
+from pandas.core.arrays.arrow.dtype import ArrowDtype
+
+T = TypeVar("T", bound="NumericArrowArray")
+
+
+class NumericArrowDtype(ArrowDtype):
+    _default_pa_dtype: pa.null()
+    _dtype_checker: Callable[[Any], bool]  # pa.types.is_<type>
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
+
+    @cache_readonly
+    def is_signed_integer(self) -> bool:
+        return self.kind == "i"
+
+    @cache_readonly
+    def is_unsigned_integer(self) -> bool:
+        return self.kind == "u"
+
+    @classmethod
+    def _str_to_dtype_mapping(cls):
+        raise AbstractMethodError(cls)
+
+
+class NumericArrowArray(ArrowExtensionArray):
+    """
+    Base class for Integer and Floating and Boolean dtypes.
+    """
+
+    _dtype_cls: type[NumericArrowDtype]
+
+    def __init__(self, values: pa.ChunkedArray) -> None:
+        checker = self._dtype_cls._dtype_checker
+        if not (isinstance(values, pa.ChunkedArray) and checker(values.type)):
+            descr = (
+                "floating"
+                if self._dtype_cls.kind == "f"  # type: ignore[comparison-overlap]
+                else "integer"
+            )
+            raise TypeError(f"values should be {descr} arrow array.")
+        super().__init__(values)
+
+    @cache_readonly
+    def dtype(self) -> NumericArrowDtype:
+        mapping = self._dtype_cls._str_to_dtype_mapping()
+        return mapping[str(self._data.type)]
+
+    @classmethod
+    def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
+        if dtype is None:
+            dtype = cls._dtype_cls._default_pa_dtype
+        return cls(pa.chunked_array([scalars], type=dtype.type))
+
+    @classmethod
+    def _from_sequence_of_strings(cls, strings, *, dtype=None, copy: bool = False):
+        from pandas.core.tools.numeric import to_numeric
+
+        scalars = to_numeric(strings, errors="raise")
+        return cls._from_sequence(scalars, dtype=dtype, copy=copy)