From a2e3e5572b81ff8c813e60c4a13438fa6170aa61 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 5 Nov 2020 14:18:41 +0000 Subject: [PATCH 01/33] TST: add GH25969 OP --- pandas/tests/series/methods/test_to_dict.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/series/methods/test_to_dict.py b/pandas/tests/series/methods/test_to_dict.py index 47badb0a1bb52..9c94da8133e63 100644 --- a/pandas/tests/series/methods/test_to_dict.py +++ b/pandas/tests/series/methods/test_to_dict.py @@ -1,5 +1,6 @@ import collections +import numpy as np import pytest from pandas import Series @@ -20,3 +21,14 @@ def test_to_dict(self, mapping, datetime_series): from_method = Series(datetime_series.to_dict(collections.Counter)) from_constructor = Series(collections.Counter(datetime_series.items())) tm.assert_series_equal(from_method, from_constructor) + + @pytest.mark.parametrize( + "input", + ({"a": np.int64(64), "b": 10}, {"a": np.int64(64), "b": 10, "c": "ABC"}), + ) + def test_to_dict_return_types(self, input): + # GH25969 + + d = Series(input).to_dict() + assert isinstance(d["a"], int) + assert isinstance(d["b"], int) From 41f0a4ba93f19624bba32e8fee5ce0ed16c146ca Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 5 Nov 2020 14:18:19 +0000 Subject: [PATCH 02/33] ENH: add maybe_box_native --- pandas/core/dtypes/cast.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9758eae60c262..23e62084fc870 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -159,6 +159,16 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value +def maybe_box_native(value): + if is_datetime_or_timedelta_dtype(value): + value = maybe_box_datetimelike(value) + elif is_float_dtype(value): + value = float(value) + elif is_integer_dtype(value): + value = int(value) + return value + + def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): """ try to cast to the specified dtype (e.g. convert back to bool/int From 5e4edbec8e7bc692802e7ce42452fb76a32959cf Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 5 Nov 2020 14:20:11 +0000 Subject: [PATCH 03/33] ENH: use maybe_box_native in Series.to_dict --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e4a805a18bcdb..b754e61adb71a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -42,6 +42,7 @@ from pandas.core.dtypes.cast import ( convert_dtypes, + maybe_box_native, maybe_cast_to_extension_array, validate_numeric_casting, ) @@ -1600,7 +1601,7 @@ def to_dict(self, into=dict): """ # GH16122 into_c = com.standardize_mapping(into) - return into_c(self.items()) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) def to_frame(self, name=None) -> "DataFrame": """ From b6967b7e071f9156dc467df056a9bd87eca999fe Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 5 Nov 2020 17:23:21 +0000 Subject: [PATCH 04/33] BUG: add scalar check to maybe_box_native --- pandas/core/dtypes/cast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 23e62084fc870..3bafe50ee451d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -160,7 +160,9 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal def maybe_box_native(value): - if is_datetime_or_timedelta_dtype(value): + if not is_scalar(value): + pass + elif is_datetime_or_timedelta_dtype(value): value = maybe_box_datetimelike(value) elif is_float_dtype(value): value = float(value) From 55919e0e0e172c34d1057b8be563dbd946c9b697 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 5 Nov 2020 18:28:13 +0000 Subject: [PATCH 05/33] BUG: suppress int conversion ValueError in maybe_box_native --- pandas/core/dtypes/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3bafe50ee451d..acf3be169b09a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -167,7 +167,8 @@ def maybe_box_native(value): elif is_float_dtype(value): value = float(value) elif is_integer_dtype(value): - value = int(value) + with suppress(ValueError): + value = int(value) return value From 759e0912033ceb1272f5a956e807028302bb3ed6 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 19:23:22 +0000 Subject: [PATCH 06/33] TST: rewrite existing to_dict tests --- pandas/tests/frame/methods/test_to_dict.py | 55 +++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index f8feef7a95eab..f44a524184091 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -256,31 +256,42 @@ def test_to_dict_wide(self): expected = {f"A_{i:d}": i for i in range(256)} assert result == expected - def test_to_dict_orient_dtype(self): - # GH22620 & GH21256 - - df = DataFrame( - { - "bool": [True, True, False], - "datetime": [ + @pytest.mark.parametrize( + "data,dtype", + ( + ([True, True, False], bool), + [ + [ datetime(2018, 1, 1), datetime(2019, 2, 2), datetime(2020, 3, 3), ], - "float": [1.0, 2.0, 3.0], - "int": [1, 2, 3], - "str": ["X", "Y", "Z"], - } - ) + Timestamp, + ], + [[1.0, 2.0, 3.0], float], + [[1, 2, 3], int], + [["X", "Y", "Z"], str], + ), + ) + def test_to_dict_orient_dtype(self, data, dtype): + # GH22620 & GH21256 - expected = { - "int": int, - "float": float, - "str": str, - "datetime": Timestamp, - "bool": bool, - } + df = DataFrame({"a": data}) + d = df.to_dict(orient="records") + assert all(type(record["a"]) is dtype for record in d) + + @pytest.mark.parametrize( + "data,dtype", + ( + [np.int64(9), int], + [np.float64(1.1), float], + [np.bool_(True), bool], + [np.datetime64("2005-02-25"), Timestamp], + ), + ) + def test_to_dict_scalar_constructor_orient_dtype(self, data, dtype): + # GH22620 & GH21256 - for df_dict in df.to_dict("records"): - result = {col: type(df_dict[col]) for col in list(df.columns)} - assert result == expected + df = DataFrame({"a": data}, index=[0]) + d = df.to_dict(orient="records") + assert type(d[0]["a"]) is dtype From 6f966f20db23a22638a1470f8b796907ba3953fa Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 19:25:54 +0000 Subject: [PATCH 07/33] CLN: use maybe_box_datetimelike -> maybe_box_native in DataFrame.to_dict --- pandas/core/frame.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 049d2c4888a69..34fac15efaa0d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -84,7 +84,7 @@ find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, - maybe_box_datetimelike, + maybe_box_native, maybe_cast_to_datetime, maybe_casted_values, maybe_convert_platform, @@ -1539,7 +1539,7 @@ def to_dict(self, orient="dict", into=dict): ( "data", [ - list(map(maybe_box_datetimelike, t)) + list(map(maybe_box_native, t)) for t in self.itertuples(index=False, name=None) ], ), @@ -1547,7 +1547,7 @@ def to_dict(self, orient="dict", into=dict): ) elif orient == "series": - return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items()) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() @@ -1556,8 +1556,7 @@ def to_dict(self, orient="dict", into=dict): for row in self.itertuples(index=False, name=None) ) return [ - into_c((k, maybe_box_datetimelike(v)) for k, v in row.items()) - for row in rows + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows ] elif orient == "index": From 2ebd673b66a3a9da63b9bd2e5b8cc05933d0dc68 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 20:19:34 +0000 Subject: [PATCH 08/33] TYP: maybe_box_native --- pandas/core/dtypes/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index acf3be169b09a..1b09208c8380f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -159,7 +159,8 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value -def maybe_box_native(value): +# TODO: this should be a TypeVar +def maybe_box_native(value: Union[Series, Scalar]) -> Union[ABCSeries, Scalar]: if not is_scalar(value): pass elif is_datetime_or_timedelta_dtype(value): From 1dc5935ce9e82b1dc7a1717805acaa04a7b36ed5 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 20:26:30 +0000 Subject: [PATCH 09/33] DOC: add docstring to maybe_box_native --- pandas/core/dtypes/cast.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1b09208c8380f..102ecd828bc8e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -161,6 +161,17 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal # TODO: this should be a TypeVar def maybe_box_native(value: Union[Series, Scalar]) -> Union[ABCSeries, Scalar]: + """ + If passed a scalar cast the scalar to a python native type. + + Parameters + ---------- + value : scalar or Series + + Returns + ------- + scalar or Series + """ if not is_scalar(value): pass elif is_datetime_or_timedelta_dtype(value): From 1e5e459292e9fd183adac9309145a3b6dae7176a Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 20:29:33 +0000 Subject: [PATCH 10/33] DOC: whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..5cfde81f2450e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -437,7 +437,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) -- +- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) Strings ^^^^^^^ From 3c6bd7efdc96a16e3315a020454f33524a1801b5 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 6 Nov 2020 20:51:22 +0000 Subject: [PATCH 11/33] TYP: fix input type hint in maybe_box_native --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 102ecd828bc8e..8873ee7a55a5a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -160,7 +160,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal # TODO: this should be a TypeVar -def maybe_box_native(value: Union[Series, Scalar]) -> Union[ABCSeries, Scalar]: +def maybe_box_native(value: Union[ABCSeries, Scalar]) -> Union[ABCSeries, Scalar]: """ If passed a scalar cast the scalar to a python native type. From e3cc18f1af805b6d952ea41ce97eff5016833c06 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 8 Nov 2020 07:42:28 +0000 Subject: [PATCH 12/33] TST (feedback): add uint testcases --- pandas/tests/series/methods/test_to_dict.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_to_dict.py b/pandas/tests/series/methods/test_to_dict.py index 9c94da8133e63..4c3d9592eebe3 100644 --- a/pandas/tests/series/methods/test_to_dict.py +++ b/pandas/tests/series/methods/test_to_dict.py @@ -24,7 +24,11 @@ def test_to_dict(self, mapping, datetime_series): @pytest.mark.parametrize( "input", - ({"a": np.int64(64), "b": 10}, {"a": np.int64(64), "b": 10, "c": "ABC"}), + ( + {"a": np.int64(64), "b": 10}, + {"a": np.int64(64), "b": 10, "c": "ABC"}, + {"a": np.uint64(64), "b": 10, "c": "ABC"}, + ), ) def test_to_dict_return_types(self, input): # GH25969 From 22819b7d8a5ebb92be05b340f06718b4b24f9e96 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 8 Nov 2020 07:43:36 +0000 Subject: [PATCH 13/33] TST (feedback): add uint testcases --- pandas/tests/frame/methods/test_to_dict.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index f44a524184091..ae0f4c3452940 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -283,7 +283,8 @@ def test_to_dict_orient_dtype(self, data, dtype): @pytest.mark.parametrize( "data,dtype", ( - [np.int64(9), int], + [np.uint64(2), int], + [np.int64(-9), int], [np.float64(1.1), float], [np.bool_(True), bool], [np.datetime64("2005-02-25"), Timestamp], From 4bb19162ba5afeee87752730ec6327d0d8301545 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 27 Dec 2020 02:37:27 -0500 Subject: [PATCH 14/33] fixups --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c112eaa36d25e..04bdf89530f72 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,6 @@ infer_dtype_from_scalar, invalidate_string_dtypes, maybe_box_native, - maybe_box_datetimelike, maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, From 587e592c356f9ee83c320cb118ea66a6a7bc1cdc Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 27 Dec 2020 02:42:29 -0500 Subject: [PATCH 15/33] fix merge error --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 93f4c6d354199..250d753d18fac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -168,6 +168,7 @@ def maybe_box_native(value: Union[ABCSeries, Scalar]) -> Union[ABCSeries, Scalar elif is_integer_dtype(value): with suppress(ValueError): value = int(value) + return value def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: From 9d81f5486cd24ea4477276f268fa89ae29564a82 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 27 Dec 2020 02:42:50 -0500 Subject: [PATCH 16/33] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c7573ee860744..ff5e59c100ac9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -205,6 +205,8 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) +- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) - - From 673da4e05c81e73037ebf0f91c713149f9255ea2 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 27 Dec 2020 14:22:16 -0500 Subject: [PATCH 17/33] fix typing --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 250d753d18fac..bdf0bc44925a9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -147,7 +147,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value -def maybe_box_native(value: Union[ABCSeries, Scalar]) -> Union[ABCSeries, Scalar]: +def maybe_box_native(value: "Union[Series, Scalar]") -> "Union[Series, Scalar]": """ If passed a scalar cast the scalar to a python native type. From 4036b63df5f2c25a83d1ddb383e9064d202cfaa2 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 28 Dec 2020 02:06:16 -0500 Subject: [PATCH 18/33] tests --- pandas/tests/tools/test_to_numeric.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index f89958f7723ef..db947aafc5a07 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -725,3 +725,19 @@ def test_to_numeric_from_nullable_string(values, expected): s = Series(values, dtype="string") result = to_numeric(s) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data, input_dtype, downcast, expected_dtype", + ( + ([1, 1], "Int64", "integer", "Int8"), + ([450, 300], "Int64", "integer", "Int16"), + ([1, 1], "Float64", "float", "Float32"), + ([1, 1], "Float64", "integer", "Int8"), + ), +) +def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype): + arr = pd.array(data, dtype=input_dtype) + result = pd.to_numeric(arr, downcast=downcast) + expected = pd.array(data, dtype=expected_dtype) + tm.assert_extension_array_equal(result, expected) From 89a841d6591fd6f19d7ef173c9e413a574eda6be Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 28 Dec 2020 02:06:49 -0500 Subject: [PATCH 19/33] add NumericArray path in to_numeric --- pandas/core/tools/numeric.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 08cdfde7df58d..161c8adb88bdb 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -7,6 +7,9 @@ ensure_object, is_datetime_or_timedelta_dtype, is_decimal, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, is_number, is_numeric_dtype, is_scalar, @@ -15,6 +18,8 @@ from pandas.core.dtypes.generic import ABCIndex, ABCSeries import pandas as pd +from pandas.core.arrays.numeric import NumericArray +from pandas.core.construction import extract_array def to_numeric(arg, errors="raise", downcast=None): @@ -118,10 +123,14 @@ def to_numeric(arg, errors="raise", downcast=None): is_series = False is_index = False is_scalars = False + is_numeric_extension_dtype = False if isinstance(arg, ABCSeries): is_series = True values = arg.values + if is_extension_array_dtype(arg) and isinstance(values, NumericArray): + is_numeric_extension_dtype = True + values = extract_array(arg) elif isinstance(arg, ABCIndex): is_index = True if needs_i8_conversion(arg.dtype): @@ -142,6 +151,14 @@ def to_numeric(arg, errors="raise", downcast=None): else: values = arg + if is_numeric_extension_dtype or ( + is_extension_array_dtype(arg) and isinstance(values, NumericArray) + ): + is_numeric_extension_dtype = True + mask = values._mask + values = values.to_numpy() + values[mask] = 0 + values_dtype = getattr(values, "dtype", None) if is_numeric_dtype(values_dtype): pass @@ -188,6 +205,16 @@ def to_numeric(arg, errors="raise", downcast=None): if values.dtype == dtype: break + if is_numeric_extension_dtype: + if is_integer_dtype(values): + from pandas.core.arrays import IntegerArray + + values = IntegerArray(values, mask) + elif is_float_dtype(values): + from pandas.core.arrays import FloatingArray + + values = FloatingArray(values, mask) + if is_series: return arg._constructor(values, index=arg.index, name=arg.name) elif is_index: From 68420ea0d27a8048ceb313a441cef12455f8ec84 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 4 Jan 2021 18:42:39 -0500 Subject: [PATCH 20/33] review: tests --- pandas/tests/frame/methods/test_to_dict.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 2f81df971eb52..8c34e752979a3 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -281,7 +281,7 @@ def test_to_dict_orient_dtype(self, data, dtype): assert all(type(record["a"]) is dtype for record in d) @pytest.mark.parametrize( - "data,dtype", + "data,expected_dtype", ( [np.uint64(2), int], [np.int64(-9), int], @@ -290,9 +290,10 @@ def test_to_dict_orient_dtype(self, data, dtype): [np.datetime64("2005-02-25"), Timestamp], ), ) - def test_to_dict_scalar_constructor_orient_dtype(self, data, dtype): + def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): # GH22620 & GH21256 df = DataFrame({"a": data}, index=[0]) d = df.to_dict(orient="records") - assert type(d[0]["a"]) is dtype + result = type(d[0]["a"]) + assert result is expected_dtype From 8b83e24840786892a2a7a18a2a47a649d30dc1e7 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 4 Jan 2021 18:42:57 -0500 Subject: [PATCH 21/33] add bool check --- pandas/core/dtypes/cast.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bdf0bc44925a9..7524fecf5830e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -168,6 +168,8 @@ def maybe_box_native(value: "Union[Series, Scalar]") -> "Union[Series, Scalar]": elif is_integer_dtype(value): with suppress(ValueError): value = int(value) + elif is_bool_dtype(value): + value = bool(value) return value From 86b0e040edcc9c616ab3f45de6c1e71e97574515 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 4 Jan 2021 18:45:58 -0500 Subject: [PATCH 22/33] review: maybe_box_native takes Scalar arg only --- pandas/core/dtypes/cast.py | 6 ++---- pandas/core/frame.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7524fecf5830e..04f3890a704c4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -147,7 +147,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value -def maybe_box_native(value: "Union[Series, Scalar]") -> "Union[Series, Scalar]": +def maybe_box_native(value: Scalar) -> Scalar: """ If passed a scalar cast the scalar to a python native type. @@ -159,9 +159,7 @@ def maybe_box_native(value: "Union[Series, Scalar]") -> "Union[Series, Scalar]": ------- scalar or Series """ - if not is_scalar(value): - pass - elif is_datetime_or_timedelta_dtype(value): + if is_datetime_or_timedelta_dtype(value): value = maybe_box_datetimelike(value) elif is_float_dtype(value): value = float(value) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04bdf89530f72..03234fcab6fce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1585,7 +1585,7 @@ def to_dict(self, orient: str = "dict", into=dict): ) elif orient == "series": - return into_c((k, maybe_box_native(v)) for k, v in self.items()) + return into_c((k, v) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() From 3e2ea12b8725a07814c668d7390835672bfbbbc7 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 31 Jan 2021 11:38:30 -0500 Subject: [PATCH 23/33] review comments: add unit test --- .../dtypes/cast/test_maybe_box_native.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 pandas/tests/dtypes/cast/test_maybe_box_native.py diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py new file mode 100644 index 0000000000000..8ec57828f923c --- /dev/null +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -0,0 +1,32 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_box_native + +from pandas import Timedelta, Timestamp + + +@pytest.mark.parametrize( + "obj,expected_dtype", + [ + (np.int(4), int), + (np.uint(4), int), + (np.int32(-4), int), + (np.uint8(4), int), + (np.float(454.98), float), + (np.float16(0.4), float), + (np.float64(1.4), float), + (np.bool_(False), bool), + (datetime(2005, 2, 25), datetime), + (np.datetime64("2005-02-25"), Timestamp), + (Timestamp("2005-02-25"), Timestamp), + (np.timedelta64(1, "D"), Timedelta), + (Timedelta(1, "D"), Timedelta), + ], +) +def test_maybe_box_native(obj, expected_dtype): + boxed_obj = maybe_box_native(obj) + result_dtype = type(boxed_obj) + assert result_dtype is expected_dtype From a455fcc9cca1ead1a9e3b9bd1ed66678f5cb4a64 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 31 Jan 2021 15:09:43 -0500 Subject: [PATCH 24/33] CI failures --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7fb775fe0f1e2..3e82aa910fc11 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -168,7 +168,7 @@ def maybe_box_native(value: Scalar) -> Scalar: elif is_float_dtype(value): value = float(value) elif is_integer_dtype(value): - with suppress(ValueError): + with suppress(ValueError, TypeError): value = int(value) elif is_bool_dtype(value): value = bool(value) From a444ef5c06ff16094642b43467583c30fe16c52a Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 11 Feb 2021 12:06:22 -0500 Subject: [PATCH 25/33] silence NumPy deprecation warning (np.int -> int) --- pandas/tests/dtypes/cast/test_maybe_box_native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index 8ec57828f923c..b483b5895099d 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize( "obj,expected_dtype", [ - (np.int(4), int), + (int(4), int), (np.uint(4), int), (np.int32(-4), int), (np.uint8(4), int), From 99d7c550b9917d302b89e3792f851858e60e1e00 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 11 Feb 2021 23:51:17 -0500 Subject: [PATCH 26/33] silence NumPy deprecation warning (np.float -> float) --- pandas/tests/dtypes/cast/test_maybe_box_native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index b483b5895099d..c119d95bb518d 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -15,7 +15,7 @@ (np.uint(4), int), (np.int32(-4), int), (np.uint8(4), int), - (np.float(454.98), float), + (float(454.98), float), (np.float16(0.4), float), (np.float64(1.4), float), (np.bool_(False), bool), From eaeb40902385d67ea7e919fa08c17e9e14ce102c Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sat, 13 Feb 2021 11:07:54 -0500 Subject: [PATCH 27/33] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ea18505e8ad20..d920ef9ea91bf 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -312,7 +312,6 @@ Numeric Conversion ^^^^^^^^^^ -- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) - Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) - - From 0467e1bdaca1bba0eef09f5677abec35ea301684 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 15 Feb 2021 20:10:44 -0500 Subject: [PATCH 28/33] review: use is_foo instead of is_foo_dtype --- pandas/core/dtypes/cast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 17c265c80b08f..b11b7b2db5a2f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -169,12 +169,12 @@ def maybe_box_native(value: Scalar) -> Scalar: """ if is_datetime_or_timedelta_dtype(value): value = maybe_box_datetimelike(value) - elif is_float_dtype(value): + elif is_float(value): value = float(value) - elif is_integer_dtype(value): + elif is_integer(value): with suppress(ValueError, TypeError): value = int(value) - elif is_bool_dtype(value): + elif is_bool(value): value = bool(value) return value From 39ca50852d7a87a39662c949ab1c09d55223dfee Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 15 Feb 2021 20:14:41 -0500 Subject: [PATCH 29/33] review: remove suppress in int clause to check if anything fails --- pandas/core/dtypes/cast.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7ba75745bdcd0..eabdbff9a518e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -172,8 +172,7 @@ def maybe_box_native(value: Scalar) -> Scalar: elif is_float(value): value = float(value) elif is_integer(value): - with suppress(ValueError, TypeError): - value = int(value) + value = int(value) elif is_bool(value): value = bool(value) return value From 08567b4a86f74478d7c981dcacfdd78a6227188f Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 16 Feb 2021 12:27:06 -0500 Subject: [PATCH 30/33] pre-commit failure --- pandas/tests/dtypes/cast/test_maybe_box_native.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index c119d95bb518d..c95f72fa17a40 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -5,7 +5,10 @@ from pandas.core.dtypes.cast import maybe_box_native -from pandas import Timedelta, Timestamp +from pandas import ( + Timedelta, + Timestamp, +) @pytest.mark.parametrize( From da620f2660540b1eb0931693b3d9525d7f9a2423 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 16 Feb 2021 18:59:54 -0500 Subject: [PATCH 31/33] review: more examples in unit test --- pandas/tests/dtypes/cast/test_maybe_box_native.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index c95f72fa17a40..3f62f31dac219 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -6,6 +6,8 @@ from pandas.core.dtypes.cast import maybe_box_native from pandas import ( + Interval, + Period, Timedelta, Timestamp, ) @@ -14,6 +16,7 @@ @pytest.mark.parametrize( "obj,expected_dtype", [ + (b"\x00\x10", bytes), (int(4), int), (np.uint(4), int), (np.int32(-4), int), @@ -27,6 +30,8 @@ (Timestamp("2005-02-25"), Timestamp), (np.timedelta64(1, "D"), Timedelta), (Timedelta(1, "D"), Timedelta), + (Interval(0, 1), Interval), + (Period("4Q2005"), Period), ], ) def test_maybe_box_native(obj, expected_dtype): From 761b7289b9cc72a39bfc25aee545609745e0d383 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 16 Feb 2021 21:43:56 -0500 Subject: [PATCH 32/33] skip json test with numpy_dev --- pandas/tests/io/json/test_json_table_schema.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index d967897e16676..00e325e11512f 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from pandas.compat import is_numpy_dev import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -753,6 +754,7 @@ def test_read_json_table_timezones_orient(self, idx, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) + @pytest.mark.skipif(is_numpy_dev, reason="DeprecationWarning") def test_comprehensive(self): df = DataFrame( { From 86c6aa78ccf4b945871be35079ffe32129cfe9b7 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 17 Feb 2021 22:29:12 -0500 Subject: [PATCH 33/33] revert changes to JSON test --- pandas/tests/io/json/test_json_table_schema.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 00e325e11512f..d967897e16676 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -6,7 +6,6 @@ import numpy as np import pytest -from pandas.compat import is_numpy_dev import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -754,7 +753,6 @@ def test_read_json_table_timezones_orient(self, idx, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.skipif(is_numpy_dev, reason="DeprecationWarning") def test_comprehensive(self): df = DataFrame( {