diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a14efd3313eaf..5b27c953450e3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -971,6 +971,7 @@ cdef class Seen: bint nat_ # seen nat bint bool_ # seen_bool bint null_ # seen_null + bint nan_ # seen_np.nan bint uint_ # seen_uint (unsigned integer) bint sint_ # seen_sint (signed integer) bint float_ # seen_float @@ -995,6 +996,7 @@ cdef class Seen: self.nat_ = 0 self.bool_ = 0 self.null_ = 0 + self.nan_ = 0 self.uint_ = 0 self.sint_ = 0 self.float_ = 0 @@ -1956,10 +1958,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, @cython.wraparound(False) def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, - bint convert_timedelta=0): + bint convert_timedelta=0, + bint convert_to_nullable_integer=0): """ Type inference function-- convert object array to proper dtype + + Parameters + ---------- + values : ndarray + Array of object elements to convert. + try_float : bool, default False + If an array-like object contains only float or NaN values is + encountered, whether to convert and return an array of float dtype. + safe : bool, default False + Whether to upcast numeric type (e.g. int cast to float). If set to + True, no upcasting will be performed. + convert_datetime : bool, default False + If an array-like object contains only datetime values or NaT is + encountered, whether to convert and return an array of M8[ns] dtype. + convert_timedelta : bool, default False + If an array-like object contains only timedelta values or NaT is + encountered, whether to convert and return an array of m8[ns] dtype. + convert_to_nullable_integer : bool, default False + If an array-like object contains only interger values (and NaN) is + encountered, whether to convert and return an IntegerArray. + + Returns + ------- + array : array of converted object values to more specific dtypes if + pplicable """ + cdef: Py_ssize_t i, n ndarray[float64_t] floats @@ -1980,6 +2009,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ints = np.empty(n, dtype='i8') uints = np.empty(n, dtype='u8') bools = np.empty(n, dtype=np.uint8) + mask = np.full(n, False) if convert_datetime: datetimes = np.empty(n, dtype='M8[ns]') @@ -1997,6 +2027,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if val is None: seen.null_ = 1 floats[i] = complexes[i] = fnan + mask[i] = True elif val is NaT: seen.nat_ = 1 if convert_datetime: @@ -2006,6 +2037,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not (convert_datetime or convert_timedelta): seen.object_ = 1 break + elif val is np.nan: + seen.nan_ = 1 + mask[i] = True + floats[i] = complexes[i] = val elif util.is_bool_object(val): seen.bool_ = 1 bools[i] = val @@ -2087,11 +2122,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not seen.object_: if not safe: - if seen.null_: + if seen.null_ or seen.nan_: if seen.is_float_or_complex: if seen.complex_: return complexes - elif seen.float_ or seen.int_: + elif seen.float_: + return floats + elif seen.int_: + if convert_to_nullable_integer: + from pandas.core.arrays import IntegerArray + return IntegerArray(ints, mask) + else: + return floats + elif seen.nan_: return floats else: if not seen.bool_: @@ -2130,7 +2173,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen.complex_: if not seen.int_: return complexes - elif seen.float_: + elif seen.float_ or seen.nan_: if not seen.int_: return floats else: @@ -2154,7 +2197,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen.complex_: if not seen.int_: return complexes - elif seen.float_: + elif seen.float_ or seen.nan_: if not seen.int_: return floats elif seen.int_: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e3c2200dbabc..c5b6995e5d3bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -857,9 +857,9 @@ def style(self): ... index=['panda', 'polar', 'koala']) >>> df species population - panda bear 1864 - polar bear 22000 - koala marsupial 80000 + panda bear 1864 + polar bear 22000 + koala marsupial 80000 >>> for label, content in df.items(): ... print('label:', label) ... print('content:', content, sep='\n') diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 05a2803b3fc2f..bb62db431ac73 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -548,6 +548,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): tuple arrays, columns """ + if columns is None: gen = (list(x.keys()) for x in data) types = (dict, OrderedDict) if PY36 else OrderedDict diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 62fb118f719e3..0408c78ac1536 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -51,6 +51,7 @@ Timestamp, isna, ) +from pandas.core.arrays import IntegerArray import pandas.util.testing as tm @@ -552,6 +553,20 @@ def test_maybe_convert_objects_datetime(self): out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) tm.assert_numpy_array_equal(out, exp) + @pytest.mark.parametrize( + "exp", + [ + IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), + IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), + ], + ) + def test_maybe_convert_objects_nullable_integer(self, exp): + # GH27335 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) + + tm.assert_extension_array_equal(result, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)