From 834b1bed762c68817baa11fc021180e70724c75d Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 19 Jun 2022 13:41:34 -0400
Subject: [PATCH 1/6] REGR: maybe_convert_objects ignoring uints

---
 doc/source/whatsnew/v1.5.0.rst            |  1 +
 pandas/_libs/lib.pyx                      | 17 ++++++---
 pandas/_libs/tslibs/util.pxd              | 34 +++++++++++++++++
 pandas/tests/dtypes/test_inference.py     | 45 +++++++++++++----------
 pandas/tests/frame/test_constructors.py   | 19 ++++++++++
 pandas/tests/indexes/multi/test_setops.py | 12 +++++-
 pandas/tests/series/test_constructors.py  | 19 ++++++++++
 7 files changed, 121 insertions(+), 26 deletions(-)
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index a5cb716317689..197f8f31a9ff3 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -795,6 +795,7 @@ Conversion
 - Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`)
 - Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`)
 - Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`)
+- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`)
 
 Strings
 ^^^^^^^
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 2136c410ef4a0..f99bf8dac0a6b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1283,9 +1283,9 @@ cdef class Seen:
         In addition to setting a flag that an integer was seen, we
         also set two flags depending on the type of integer seen:
 
-        1) sint_ : a negative (signed) number in the
+        1) sint_ : a signed numpy integer type or a negative (signed) number in the
                    range of [-2**63, 0) was encountered
-        2) uint_ : a positive number in the range of
+        2) uint_ : an unsigned numpy integer type or a positive number in the range of
                    [2**63, 2**64) was encountered
 
         Parameters
@@ -1294,8 +1294,16 @@ cdef class Seen:
             Value with which to set the flags.
         """
         self.int_ = True
-        self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
-        self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
+        self.sint_ = (
+            self.sint_
+            or (oINT64_MIN <= val < 0)
+            or util.is_sinteger_object(val)
+        )
+        self.uint_ = (
+            self.uint_
+            or (oINT64_MAX < val <= oUINT64_MAX)
+            or util.is_uinteger_object(val)
+        )
 
     @property
     def numeric_(self):
@@ -2542,7 +2550,6 @@ def maybe_convert_objects(ndarray[object] objects,
             floats[i] = <float64_t>val
             complexes[i] = <double complex>val
             if not seen.null_:
-                val = int(val)
                 seen.saw_int(val)
 
                 if ((seen.uint_ and seen.sint_) or
diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
index 492b7d519551f..46d504a22b8bc 100644
--- a/pandas/_libs/tslibs/util.pxd
+++ b/pandas/_libs/tslibs/util.pxd
@@ -33,6 +33,8 @@ from numpy cimport (
 
 
 cdef extern from "numpy/arrayobject.h":
+    PyTypeObject PySignedIntegerArrType_Type
+    PyTypeObject PyUnsignedIntegerArrType_Type
     PyTypeObject PyFloatingArrType_Type
 
 cdef extern from "numpy/ndarrayobject.h":
@@ -55,6 +57,38 @@ cdef inline int64_t get_nat():
 # --------------------------------------------------------------------
 # Type Checking
 
+cdef inline bint is_sinteger_object(object obj) nogil:
+    """
+    Cython equivalent of
+
+    `isinstance(val, np.signedinteger)`
+
+    Parameters
+    ----------
+    val : object
+
+    Returns
+    -------
+    is_sinteger : bool
+    """
+    return PyObject_TypeCheck(obj, &PySignedIntegerArrType_Type)
+
+cdef inline bint is_uinteger_object(object obj) nogil:
+    """
+    Cython equivalent of
+
+    `isinstance(val, np.unsignedinteger)`
+
+    Parameters
+    ----------
+    val : object
+
+    Returns
+    -------
+    is_uinteger : bool
+    """
+    return PyObject_TypeCheck(obj, &PyUnsignedIntegerArrType_Type)
+
 cdef inline bint is_integer_object(object obj) nogil:
     """
     Cython equivalent of
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index b12476deccbfc..8fe6abd3b0ed5 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -700,25 +700,32 @@ def test_convert_int_overflow(self, value):
         result = lib.maybe_convert_objects(arr)
         tm.assert_numpy_array_equal(arr, result)
 
-    def test_maybe_convert_objects_uint64(self):
-        # see gh-4471
-        arr = np.array([2**63], dtype=object)
-        exp = np.array([2**63], dtype=np.uint64)
-        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
-
-        # NumPy bug: can't compare uint64 to int64, as that
-        # results in both casting to float64, so we should
-        # make sure that this function is robust against it
-        arr = np.array([np.uint64(2**63)], dtype=object)
-        exp = np.array([2**63], dtype=np.uint64)
-        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
-
-        arr = np.array([2, -1], dtype=object)
-        exp = np.array([2, -1], dtype=np.int64)
-        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
-
-        arr = np.array([2**63, -1], dtype=object)
-        exp = np.array([2**63, -1], dtype=object)
+    @pytest.mark.parametrize(
+        "value, expected_dtype",
+        [
+            # see gh-4471
+            ([2**63], np.uint64),
+            # NumPy bug: can't compare uint64 to int64, as that
+            # results in both casting to float64, so we should
+            # make sure that this function is robust against it
+            ([np.uint64(2**63)], np.uint64),
+            ([2, -1], np.int64),
+            ([2**63, -1], object),
+            # GH#47294
+            ([np.uint8(1)], np.uint8),
+            ([np.uint16(1)], np.uint16),
+            ([np.uint32(1)], np.uint32),
+            ([np.uint64(1)], np.uint64),
+            ([np.uint8(2), np.uint16(1)], np.uint16),
+            ([np.uint32(2), np.uint16(1)], np.uint32),
+            ([np.uint32(2), -1], object),
+            ([np.uint32(2), 1], np.uint64),
+            ([np.uint32(2), np.int32(1)], object),
+        ],
+    )
+    def test_maybe_convert_objects_uint(self, value, expected_dtype):
+        arr = np.array(value, dtype=object)
+        exp = np.array(value, dtype=expected_dtype)
         tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
 
     def test_maybe_convert_objects_datetime(self):
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index f06641002e039..d00cf198b3296 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -434,6 +434,25 @@ def test_constructor_int_overflow(self, values):
         assert result[0].dtype == object
         assert result[0][0] == value
 
+    @pytest.mark.parametrize(
+        "values",
+        [
+            np.array([1], dtype=np.uint16),
+            np.array([1], dtype=np.uint32),
+            np.array([1], dtype=np.uint64),
+            [np.uint16(1)],
+            [np.uint32(1)],
+            [np.uint64(1)],
+        ],
+    )
+    def test_constructor_numpy_uints(self, values):
+        # GH#47294
+        value = values[0]
+        result = DataFrame(values)
+
+        assert result[0].dtype == value.dtype
+        assert result[0][0] == value
+
     def test_constructor_ordereddict(self):
         import random
 
diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
index 57c4af1a0fe1c..39b5e0ffc526c 100644
--- a/pandas/tests/indexes/multi/test_setops.py
+++ b/pandas/tests/indexes/multi/test_setops.py
@@ -540,10 +540,18 @@ def test_union_duplicates(index, request):
     mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
     mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
     result = mi1.union(mi2)
-    tm.assert_index_equal(result, mi2.sort_values())
+    expected = mi2.sort_values()
+    if mi2.levels[0].dtype == np.uint64 and (mi2.get_level_values(0) < 2**63).all():
+        # GH#47294 - union uses lib.fast_zip, converting data to Python integers
+        # and loses type information. Result is then unsigned only when values are
+        # sufficiently large to require unsigned dtype.
+        expected = expected.set_levels(
+            [expected.levels[0].astype(int), expected.levels[1]]
+        )
+    tm.assert_index_equal(result, expected)
 
     result = mi2.union(mi1)
-    tm.assert_index_equal(result, mi2.sort_values())
+    tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 3dce22a06c1b2..cec06d054d766 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -745,6 +745,25 @@ def test_constructor_signed_int_overflow_deprecation(self):
         expected = Series([1, 200, 50], dtype="uint8")
         tm.assert_series_equal(ser, expected)
 
+    @pytest.mark.parametrize(
+        "values",
+        [
+            np.array([1], dtype=np.uint16),
+            np.array([1], dtype=np.uint32),
+            np.array([1], dtype=np.uint64),
+            [np.uint16(1)],
+            [np.uint32(1)],
+            [np.uint64(1)],
+        ],
+    )
+    def test_constructor_numpy_uints(self, values):
+        # GH#47294
+        value = values[0]
+        result = Series(values)
+
+        assert result[0].dtype == value.dtype
+        assert result[0] == value
+
     def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype):
         # see gh-15832
         msg = "Trying to coerce negative values to unsigned integers"

From 6e3857a1d7cad8f444b7f4f0873530b251ef3c92 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 22 Jun 2022 19:19:06 -0400
Subject: [PATCH 2/6] Add test

---
 pandas/tests/frame/indexing/test_setitem.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index cf6d351aa78a0..2eeab26c9be04 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -57,7 +57,9 @@ class mystring(str):
         expected = DataFrame({"a": [1], "b": [2], mystring("c"): [3]}, index=index)
         tm.assert_equal(df, expected)
 
-    @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
+    @pytest.mark.parametrize(
+        "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"]
+    )
     def test_setitem_dtype(self, dtype, float_frame):
         arr = np.random.randn(len(float_frame))
 
@@ -210,6 +212,7 @@ def test_setitem_dict_preserves_dtypes(self):
                 "a": Series([0, 1, 2], dtype="int64"),
                 "b": Series([1, 2, 3], dtype=float),
                 "c": Series([1, 2, 3], dtype=float),
+                "d": Series([1, 2, 3], dtype="uint32"),
             }
         )
         df = DataFrame(
@@ -217,10 +220,16 @@ def test_setitem_dict_preserves_dtypes(self):
                 "a": Series([], dtype="int64"),
                 "b": Series([], dtype=float),
                 "c": Series([], dtype=float),
+                "d": Series([], dtype="uint32"),
             }
         )
         for idx, b in enumerate([1, 2, 3]):
-            df.loc[df.shape[0]] = {"a": int(idx), "b": float(b), "c": float(b)}
+            df.loc[df.shape[0]] = {
+                "a": int(idx),
+                "b": float(b),
+                "c": float(b),
+                "d": np.uint32(b),
+            }
         tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize(

From 4e4cac9bb110e3f1cb171f415cb242b75416e496 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 27 Jun 2022 17:19:36 -0400
Subject: [PATCH 3/6] inline

---
 pandas/_libs/lib.pyx         | 10 +++++++++-
 pandas/_libs/tslibs/util.pxd | 17 -----------------
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index f99bf8dac0a6b..022e4a982b35e 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -17,6 +17,7 @@ from cpython.number cimport PyNumber_Check
 from cpython.object cimport (
     Py_EQ,
     PyObject_RichCompareBool,
+    PyTypeObject,
 )
 from cpython.ref cimport Py_INCREF
 from cpython.sequence cimport PySequence_Check
@@ -54,6 +55,11 @@ from numpy cimport (
 
 cnp.import_array()
 
+cdef extern from "Python.h":
+    # Note: importing extern-style allows us to declare these as nogil
+    # functions, whereas `from cpython cimport` does not.
+    bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
+
 cdef extern from "numpy/arrayobject.h":
     # cython's numpy.dtype specification is incorrect, which leads to
     # errors in issubclass(self.dtype.type, np.bool_), so we directly
@@ -71,6 +77,8 @@ cdef extern from "numpy/arrayobject.h":
             object fields
             tuple names
 
+    PyTypeObject PySignedIntegerArrType_Type
+
 cdef extern from "numpy/ndarrayobject.h":
     bint PyArray_CheckScalar(obj) nogil
 
@@ -1297,7 +1305,7 @@ cdef class Seen:
         self.sint_ = (
             self.sint_
             or (oINT64_MIN <= val < 0)
-            or util.is_sinteger_object(val)
+            or PyObject_TypeCheck(val, &PySignedIntegerArrType_Type)
         )
         self.uint_ = (
             self.uint_
diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
index 46d504a22b8bc..6ad4b2e791498 100644
--- a/pandas/_libs/tslibs/util.pxd
+++ b/pandas/_libs/tslibs/util.pxd
@@ -33,7 +33,6 @@ from numpy cimport (
 
 
 cdef extern from "numpy/arrayobject.h":
-    PyTypeObject PySignedIntegerArrType_Type
     PyTypeObject PyUnsignedIntegerArrType_Type
     PyTypeObject PyFloatingArrType_Type
 
@@ -57,22 +56,6 @@ cdef inline int64_t get_nat():
 # --------------------------------------------------------------------
 # Type Checking
 
-cdef inline bint is_sinteger_object(object obj) nogil:
-    """
-    Cython equivalent of
-
-    `isinstance(val, np.signedinteger)`
-
-    Parameters
-    ----------
-    val : object
-
-    Returns
-    -------
-    is_sinteger : bool
-    """
-    return PyObject_TypeCheck(obj, &PySignedIntegerArrType_Type)
-
 cdef inline bint is_uinteger_object(object obj) nogil:
     """
     Cython equivalent of

From 9458e403ea9da730a353eec9771473d8737082a6 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 27 Jun 2022 17:21:43 -0400
Subject: [PATCH 4/6] inline

---
 pandas/_libs/lib.pyx         |  3 ++-
 pandas/_libs/tslibs/util.pxd | 17 -----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 022e4a982b35e..27cc167c0aeeb 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -78,6 +78,7 @@ cdef extern from "numpy/arrayobject.h":
             tuple names
 
     PyTypeObject PySignedIntegerArrType_Type
+    PyTypeObject PyUnsignedIntegerArrType_Type
 
 cdef extern from "numpy/ndarrayobject.h":
     bint PyArray_CheckScalar(obj) nogil
@@ -1310,7 +1311,7 @@ cdef class Seen:
         self.uint_ = (
             self.uint_
             or (oINT64_MAX < val <= oUINT64_MAX)
-            or util.is_uinteger_object(val)
+            or PyObject_TypeCheck(val, &PyUnsignedIntegerArrType_Type)
         )
 
     @property
diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
index 6ad4b2e791498..492b7d519551f 100644
--- a/pandas/_libs/tslibs/util.pxd
+++ b/pandas/_libs/tslibs/util.pxd
@@ -33,7 +33,6 @@ from numpy cimport (
 
 
 cdef extern from "numpy/arrayobject.h":
-    PyTypeObject PyUnsignedIntegerArrType_Type
     PyTypeObject PyFloatingArrType_Type
 
 cdef extern from "numpy/ndarrayobject.h":
@@ -56,22 +55,6 @@ cdef inline int64_t get_nat():
 # --------------------------------------------------------------------
 # Type Checking
 
-cdef inline bint is_uinteger_object(object obj) nogil:
-    """
-    Cython equivalent of
-
-    `isinstance(val, np.unsignedinteger)`
-
-    Parameters
-    ----------
-    val : object
-
-    Returns
-    -------
-    is_uinteger : bool
-    """
-    return PyObject_TypeCheck(obj, &PyUnsignedIntegerArrType_Type)
-
 cdef inline bint is_integer_object(object obj) nogil:
     """
     Cython equivalent of

From 5151dca8575b678652c3d5a5738c9a3bd4ecafb2 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sat, 9 Jul 2022 11:23:49 -0400
Subject: [PATCH 5/6] Added tests for Index construction

---
 pandas/tests/indexes/numeric/test_numeric.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py
index 7d2bcdf20c795..23262cb2eb768 100644
--- a/pandas/tests/indexes/numeric/test_numeric.py
+++ b/pandas/tests/indexes/numeric/test_numeric.py
@@ -509,6 +509,20 @@ def test_constructor_coercion_signed_to_unsigned(
         with pytest.raises(OverflowError, match=msg):
             Index([-1], dtype=any_unsigned_int_numpy_dtype)
 
+    def test_constructor_np_signed(self, any_signed_int_numpy_dtype):
+        # GH#47475
+        scalar = np.dtype(any_signed_int_numpy_dtype).type(1)
+        result = Index([scalar])
+        expected = Int64Index([1])
+        tm.assert_index_equal(result, expected)
+
+    def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype):
+        # GH#47475
+        scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1)
+        result = Index([scalar])
+        expected = UInt64Index([1])
+        tm.assert_index_equal(result, expected)
+
     def test_coerce_list(self):
         # coerce things
         arr = Index([1, 2, 3, 4])

From 7c1c72efe3345fb1192913c269ca7d6f8858a9b4 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sat, 9 Jul 2022 11:53:42 -0400
Subject: [PATCH 6/6] Added comments

---
 pandas/_libs/lib.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 27cc167c0aeeb..e353d224708b7 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1306,11 +1306,13 @@ cdef class Seen:
         self.sint_ = (
             self.sint_
             or (oINT64_MIN <= val < 0)
+            # Cython equivalent of `isinstance(val, np.signedinteger)`
             or PyObject_TypeCheck(val, &PySignedIntegerArrType_Type)
         )
         self.uint_ = (
             self.uint_
             or (oINT64_MAX < val <= oUINT64_MAX)
+            # Cython equivalent of `isinstance(val, np.unsignedinteger)`
             or PyObject_TypeCheck(val, &PyUnsignedIntegerArrType_Type)
         )