From 07782bc966671966d948886858946e8214a415ce Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 01:08:49 +0000 Subject: [PATCH 01/20] TST: add tests from OP --- pandas/tests/arrays/integer/test_dtypes.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index d71037f9151e0..069879702135a 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -294,3 +294,18 @@ def test_astype_boolean(): result = a.astype("boolean") expected = pd.array([True, False, True, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + lambda s: s.tolist()[0], + lambda s: s.to_dict()[0], + lambda s: list(s.iteritems())[0][1], + lambda s: list(iter(s))[0], + ], +) +def test_conversion_methods_return_type_is_native(func): + # GH 29738 + s = pd.Series([1, 2], dtype="Int64") + assert isinstance(func(s), int) From 1b164c3f437d34fc1d9f05c855c05bee1e8eea13 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 01:09:24 +0000 Subject: [PATCH 02/20] ENH: implement __iter__ from IntegerArray --- pandas/core/arrays/integer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 88a5a88efe146..c56e1a58c5109 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -357,6 +357,13 @@ def __pos__(self): def __abs__(self): return type(self)(np.abs(self._data), self._mask) + def __iter__(self): + for i in range(len(self)): + if self._mask[i]: + yield self.dtype.na_value + else: + yield self._data[i].item() + @classmethod def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": return integer_array(scalars, dtype=dtype, copy=copy) From 142c81fcb887ac90883d70f45e8c88a1849ad9d8 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 15:38:17 +0000 Subject: [PATCH 03/20] feedback: move __iter__ method to base class --- pandas/core/arrays/integer.py | 7 ------- pandas/core/arrays/masked.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c56e1a58c5109..88a5a88efe146 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -357,13 +357,6 @@ def __pos__(self): def __abs__(self): return type(self)(np.abs(self._data), self._mask) - def __iter__(self): - for i in range(len(self)): - if self._mask[i]: - yield self.dtype.na_value - else: - yield self._data[i].item() - @classmethod def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": return integer_array(scalars, dtype=dtype, copy=copy) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9febba0f544ac..df722cabedf21 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -132,7 +132,7 @@ def __iter__(self): if self._mask[i]: yield self.dtype.na_value else: - yield self._data[i] + yield self._data[i].item() def __len__(self) -> int: return len(self._data) From 33579015c7dcb9b88d90a1b9262a88aad35a62c8 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 15:39:15 +0000 Subject: [PATCH 04/20] TST: parametrize Int tests on dtype --- pandas/tests/arrays/integer/test_dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 069879702135a..4c1de8f184674 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -305,7 +305,8 @@ def test_astype_boolean(): lambda s: list(iter(s))[0], ], ) -def test_conversion_methods_return_type_is_native(func): +def test_conversion_methods_return_type_is_native(any_nullable_int_dtype, func): # GH 29738 - s = pd.Series([1, 2], dtype="Int64") + dtype = any_nullable_int_dtype + s = pd.Series([1, 2], dtype=dtype) assert isinstance(func(s), int) From e38934e8a9882463970923e92a8fc4cde753bd9d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 15:40:03 +0000 Subject: [PATCH 05/20] TST: add floating tests --- pandas/tests/arrays/floating/test_astype.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py index 828d80d2f9a51..c10c2ba9a79f6 100644 --- a/pandas/tests/arrays/floating/test_astype.py +++ b/pandas/tests/arrays/floating/test_astype.py @@ -118,3 +118,19 @@ def test_astype_object(dtype): # check exact element types assert isinstance(result[0], float) assert result[1] is pd.NA + + +@pytest.mark.parametrize( + "func", + [ + lambda s: s.tolist()[0], + lambda s: s.to_dict()[0], + lambda s: list(s.iteritems())[0][1], + lambda s: list(iter(s))[0], + ], +) +def test_conversion_methods_return_type_is_native(float_ea_dtype, func): + # GH 29738 + dtype = float_ea_dtype + s = pd.Series([1, 2], dtype=dtype) + assert isinstance(func(s), float) From c32aafa00354eb13f311d595069d98caea3952bc Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 15:41:25 +0000 Subject: [PATCH 06/20] TST: add boolean tests --- pandas/tests/arrays/boolean/test_astype.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py index 57cec70262526..d73ba93fd8324 100644 --- a/pandas/tests/arrays/boolean/test_astype.py +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -51,3 +51,18 @@ def test_astype_to_integer_array(): result = arr.astype("Int64") expected = pd.array([1, 0, None], dtype="Int64") tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + lambda s: s.tolist()[0], + lambda s: s.to_dict()[0], + lambda s: list(s.iteritems())[0][1], + lambda s: list(iter(s))[0], + ], +) +def test_conversion_methods_return_type_is_native(func): + # GH 29738 + s = pd.Series([True, False], dtype="boolean") + assert isinstance(func(s), bool) From 2eb72190e3f625dec0c866596009c75d6086377f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 24 Oct 2020 16:44:53 +0000 Subject: [PATCH 07/20] TST: #346654 --- pandas/tests/arrays/integer/test_dtypes.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 4c1de8f184674..4023c2b726cdb 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -310,3 +310,16 @@ def test_conversion_methods_return_type_is_native(any_nullable_int_dtype, func): dtype = any_nullable_int_dtype s = pd.Series([1, 2], dtype=dtype) assert isinstance(func(s), int) + + +def test_conversion_to_dict_oriented_record_returns_native(any_nullable_int_dtype): + # GH 34665 + + df = pd.DataFrame({"A": [1, None]}) + df["A"] = df["A"].astype("Int64") + records_as_dicts = df.to_dict(orient="records") + expected = [{"A": 1}, {"A": pd.NA}] + + assert records_as_dicts == expected + assert type(records_as_dicts[0]["A"]) is int + assert type(records_as_dicts[1]["A"]) is pd._libs.missing.NAType From db4154de938a9767c6f876ed38b7c0977dea6f5f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 30 Oct 2020 18:36:33 +0000 Subject: [PATCH 08/20] feedback: gather tests in separate file + use fixtures --- .../tests/arrays/masked/test_conversions.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 pandas/tests/arrays/masked/test_conversions.py diff --git a/pandas/tests/arrays/masked/test_conversions.py b/pandas/tests/arrays/masked/test_conversions.py new file mode 100644 index 0000000000000..567ce4d84d996 --- /dev/null +++ b/pandas/tests/arrays/masked/test_conversions.py @@ -0,0 +1,76 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import isna + + +@pytest.fixture( + params=[ + lambda s: s.tolist(), + lambda s: s.to_dict().values(), + lambda s: [v for _, v in s.iteritems()], + lambda s: list(iter(s)), + ] +) +def series_conversion(request): + return request.param + + +@pytest.fixture( + params=[ + lambda df: df.to_dict()["A"].values(), + lambda df: [record["A"] for record in df.to_dict(orient="records")], + ] +) +def frame_conversion(request): + return request.param + + +@pytest.fixture(params=[[True, False], [False, pd.NA], [False, np.nan]]) +def boolean_data(request): + return request.param + + +@pytest.fixture(params=[[1.0, 2.0], [1.0, pd.NA], [1.0, np.nan]]) +def float_data(request): + return request.param + + +@pytest.fixture(params=[[1, 2], [1, pd.NA], [1, np.nan]]) +def int_data(request): + return request.param + + +class TestSeriesReturnTypesArePythonNative: + def test_boolean(self, boolean_data, series_conversion): + # GH 29738 + s = pd.Series(boolean_data, dtype="boolean") + assert all(isinstance(val, bool) or isna(val) for val in series_conversion(s)) + + def test_float(self, float_data, float_ea_dtype, series_conversion): + # GH 29738 + s = pd.Series(float_data, dtype=float_ea_dtype) + assert all(isinstance(val, float) or isna(val) for val in series_conversion(s)) + + def test_int(self, int_data, any_nullable_int_dtype, series_conversion): + # GH 29738 + s = pd.Series(int_data, dtype=any_nullable_int_dtype) + assert all(isinstance(val, int) or isna(val) for val in series_conversion(s)) + + +class TestFrameReturnTypesArePythonNative: + def test_boolean(self, boolean_data, frame_conversion): + # GH 29738 + s = pd.DataFrame({"A": boolean_data}, dtype="boolean") + assert all(isinstance(val, bool) or isna(val) for val in frame_conversion(s)) + + def test_float(self, float_data, float_ea_dtype, frame_conversion): + # GH 29738 + s = pd.DataFrame({"A": float_data}, dtype=float_ea_dtype) + assert all(isinstance(val, float) or isna(val) for val in frame_conversion(s)) + + def test_int(self, int_data, any_nullable_int_dtype, frame_conversion): + # GH 29738 + s = pd.DataFrame({"A": int_data}, dtype=any_nullable_int_dtype) + assert all(isinstance(val, int) or isna(val) for val in frame_conversion(s)) From 30d2f09f35af396f0b5d9593baa95b3cbb65a697 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 30 Oct 2020 18:39:05 +0000 Subject: [PATCH 09/20] TST: remove tests from original locations --- pandas/tests/arrays/boolean/test_astype.py | 15 ---------- pandas/tests/arrays/floating/test_astype.py | 16 ---------- pandas/tests/arrays/floating/test_function.py | 3 ++ pandas/tests/arrays/integer/test_dtypes.py | 29 ------------------- 4 files changed, 3 insertions(+), 60 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py index d73ba93fd8324..57cec70262526 100644 --- a/pandas/tests/arrays/boolean/test_astype.py +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -51,18 +51,3 @@ def test_astype_to_integer_array(): result = arr.astype("Int64") expected = pd.array([1, 0, None], dtype="Int64") tm.assert_extension_array_equal(result, expected) - - -@pytest.mark.parametrize( - "func", - [ - lambda s: s.tolist()[0], - lambda s: s.to_dict()[0], - lambda s: list(s.iteritems())[0][1], - lambda s: list(iter(s))[0], - ], -) -def test_conversion_methods_return_type_is_native(func): - # GH 29738 - s = pd.Series([True, False], dtype="boolean") - assert isinstance(func(s), bool) diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py index c10c2ba9a79f6..828d80d2f9a51 100644 --- a/pandas/tests/arrays/floating/test_astype.py +++ b/pandas/tests/arrays/floating/test_astype.py @@ -118,19 +118,3 @@ def test_astype_object(dtype): # check exact element types assert isinstance(result[0], float) assert result[1] is pd.NA - - -@pytest.mark.parametrize( - "func", - [ - lambda s: s.tolist()[0], - lambda s: s.to_dict()[0], - lambda s: list(s.iteritems())[0][1], - lambda s: list(iter(s))[0], - ], -) -def test_conversion_methods_return_type_is_native(float_ea_dtype, func): - # GH 29738 - dtype = float_ea_dtype - s = pd.Series([1, 2], dtype=dtype) - assert isinstance(func(s), float) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index 2767d93741d4c..baf60a363ad29 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat import IS64 + import pandas as pd import pandas._testing as tm @@ -71,6 +73,7 @@ def test_ufunc_reduce_raises(values): np.add.reduce(a) +@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") @pytest.mark.parametrize( "pandasmethname, kwargs", [ diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 4023c2b726cdb..d71037f9151e0 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -294,32 +294,3 @@ def test_astype_boolean(): result = a.astype("boolean") expected = pd.array([True, False, True, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) - - -@pytest.mark.parametrize( - "func", - [ - lambda s: s.tolist()[0], - lambda s: s.to_dict()[0], - lambda s: list(s.iteritems())[0][1], - lambda s: list(iter(s))[0], - ], -) -def test_conversion_methods_return_type_is_native(any_nullable_int_dtype, func): - # GH 29738 - dtype = any_nullable_int_dtype - s = pd.Series([1, 2], dtype=dtype) - assert isinstance(func(s), int) - - -def test_conversion_to_dict_oriented_record_returns_native(any_nullable_int_dtype): - # GH 34665 - - df = pd.DataFrame({"A": [1, None]}) - df["A"] = df["A"].astype("Int64") - records_as_dicts = df.to_dict(orient="records") - expected = [{"A": 1}, {"A": pd.NA}] - - assert records_as_dicts == expected - assert type(records_as_dicts[0]["A"]) is int - assert type(records_as_dicts[1]["A"]) is pd._libs.missing.NAType From d7fced719049ccf14ae597b0d3d916d806c96a9e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 06:52:06 +0000 Subject: [PATCH 10/20] TST: rewrite expected construction using pd.array --- pandas/tests/extension/base/methods.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e973b1247941f..95f4105757ab4 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -241,12 +241,11 @@ def test_combine_add(self, data_repeated): s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - with np.errstate(over="ignore"): - expected = pd.Series( - orig_data1._from_sequence( - [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] - ) - ) + + arr = pd.array( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))], dtype="Int64" + ) + expected = pd.Series(arr, dtype="boolean") self.assert_series_equal(result, expected) val = s1.iloc[0] From 7bdc0ac304a66c62ea817f8d2bcb6d6bfa323cbb Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 06:53:19 +0000 Subject: [PATCH 11/20] TST: add comment --- pandas/tests/extension/base/methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 95f4105757ab4..e6730a26e0641 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -242,10 +242,11 @@ def test_combine_add(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - arr = pd.array( + arr = pd.array( # cannot cast to boolean directly atm [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))], dtype="Int64" ) expected = pd.Series(arr, dtype="boolean") + self.assert_series_equal(result, expected) val = s1.iloc[0] From 9bf6b25dffd29693f86db8264a1461aa8637c7c4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 07:03:57 +0000 Subject: [PATCH 12/20] TST: skip float-string conversion, reason:M f-p precision --- pandas/tests/extension/base/casting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 039b42210224e..e3332ca1bd79c 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -32,6 +32,7 @@ def test_tolist(self, data): expected = list(data) assert result == expected + @pytest.mark.skip(reason="Floating precision issues") def test_astype_str(self, data): result = pd.Series(data[:5]).astype(str) expected = pd.Series([str(x) for x in data[:5]], dtype=str) From ec837c07ae481e5654dd4bcf4e529815009aae4b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 07:17:36 +0000 Subject: [PATCH 13/20] TST/BUG: correct test rewrite --- pandas/tests/extension/base/methods.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e6730a26e0641..e0b0d935bb092 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -242,12 +242,14 @@ def test_combine_add(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - arr = pd.array( # cannot cast to boolean directly atm - [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))], dtype="Int64" + expected = pd.Series( + pd.array([a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]), + dtype=orig_data1.dtype, ) - expected = pd.Series(arr, dtype="boolean") - self.assert_series_equal(result, expected) + # TODO: expected currently has an incorrect dtype + # fix construction and set check_type=True in assertion + self.assert_series_equal(result, expected, check_dtype=False) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 + x2) From c7db14aff9e40fef32664c63372d4458cfd4b69e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 07:18:21 +0000 Subject: [PATCH 14/20] TST: skip string conversion test due to fp-precision issues --- pandas/tests/extension/base/casting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index e3332ca1bd79c..4145a009dbfe4 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -38,6 +38,7 @@ def test_astype_str(self, data): expected = pd.Series([str(x) for x in data[:5]], dtype=str) self.assert_series_equal(result, expected) + @pytest.mark.skip(reason="Floating precision issues") def test_astype_string(self, data): # GH-33465 result = pd.Series(data[:5]).astype("string") From e70a7dfac8b2b3b14eb663a9a4d2b5e373d67924 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 07:46:42 +0000 Subject: [PATCH 15/20] TST: DRY the code using data fixture --- pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/return_types.py | 77 +++++++++++++++++++++ pandas/tests/extension/test_boolean.py | 4 ++ pandas/tests/extension/test_floating.py | 4 ++ pandas/tests/extension/test_integer.py | 4 ++ 5 files changed, 90 insertions(+) create mode 100644 pandas/tests/extension/base/return_types.py diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 323cb843b2d74..55564faf359c6 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -63,4 +63,5 @@ class TestMyDtype(BaseDtypeTests): BaseNumericReduceTests, ) from .reshaping import BaseReshapingTests # noqa +from .return_types import BaseReturnTypeTests # noqa from .setitem import BaseSetitemTests # noqa diff --git a/pandas/tests/extension/base/return_types.py b/pandas/tests/extension/base/return_types.py new file mode 100644 index 0000000000000..4d667a1fce6ab --- /dev/null +++ b/pandas/tests/extension/base/return_types.py @@ -0,0 +1,77 @@ +import pytest + +from pandas.core.dtypes.common import is_bool_dtype, is_float_dtype, is_integer_dtype + +import pandas as pd +from pandas import isna + +from .base import BaseExtensionTests + + +@pytest.fixture( + params=[ + lambda df: df.to_dict()["A"].values(), + lambda df: [record["A"] for record in df.to_dict(orient="records")], + ] +) +def frame_conversion(request): + return request.param + + +class BaseReturnTypeTests(BaseExtensionTests): + def get_native_dtype(self, dtype): + if is_integer_dtype(dtype): + return int + elif is_float_dtype(dtype): + return float + elif is_bool_dtype(dtype): + return bool + else: + raise ValueError("invalid dtype provided") + + @pytest.mark.parametrize( + "func", + [ + lambda s: s.tolist(), + lambda s: s.to_dict().values(), + lambda s: [v for _, v in s.iteritems()], + lambda s: list(iter(s)), + ], + ) + def test_series(self, all_data, func): + # GH 29738 + s = pd.Series(all_data) + native_dtype = self.get_native_dtype(all_data.dtype) + + assert all(isinstance(val, native_dtype) or isna(val) for val in func(s)) + + @pytest.mark.parametrize( + "func", + [ + lambda df: df.to_dict()["A"].values(), + lambda df: [record["A"] for record in df.to_dict(orient="records")], + ], + ) + def test_frame(self, all_data, func): + # GH 29738 + s = pd.DataFrame({"A": all_data}) + native_dtype = self.get_native_dtype(all_data.dtype) + + assert all(isinstance(val, native_dtype) or isna(val) for val in func(s)) + + +# class TestFrameReturnTypesArePythonNative: +# def test_boolean(self, boolean_data, frame_conversion): +# # GH 29738 +# s = pd.DataFrame({"A": boolean_data}, dtype="boolean") +# assert all(isinstance(val, bool) or isna(val) for val in frame_conversion(s)) + +# def test_float(self, float_data, float_ea_dtype, frame_conversion): +# # GH 29738 +# s = pd.DataFrame({"A": float_data}, dtype=float_ea_dtype) +# assert all(isinstance(val, float) or isna(val) for val in frame_conversion(s)) + +# def test_int(self, int_data, any_nullable_int_dtype, frame_conversion): +# # GH 29738 +# s = pd.DataFrame({"A": int_data}, dtype=any_nullable_int_dtype) +# assert all(isinstance(val, int) or isna(val) for val in frame_conversion(s)) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 8acbeaf0b8170..6bcb1ba1e3299 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -99,6 +99,10 @@ class TestMissing(base.BaseMissingTests): pass +class TestReturnTypes(base.BaseReturnTypeTests): + pass + + class TestArithmeticOps(base.BaseArithmeticOpsTests): implements = {"__sub__", "__rsub__"} diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 00881178de1b4..3f3c48aae3c89 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -169,6 +169,10 @@ class TestMissing(base.BaseMissingTests): pass +class TestReturnTypes(base.BaseReturnTypeTests): + pass + + class TestMethods(base.BaseMethodsTests): @pytest.mark.skip(reason="uses nullable integer") def test_value_counts(self, all_data, dropna): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 725533765ca2c..d636ff8e3a4ae 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -209,6 +209,10 @@ class TestMissing(base.BaseMissingTests): pass +class TestReturnTypes(base.BaseReturnTypeTests): + pass + + class TestMethods(base.BaseMethodsTests): @pytest.mark.skip(reason="uses nullable integer") def test_value_counts(self, all_data, dropna): From ff1ede7a5a8cb0ddb7bfcb2cfe032d256c601b14 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 1 Nov 2020 07:48:35 +0000 Subject: [PATCH 16/20] CLN: remove unused code --- .../tests/arrays/masked/test_conversions.py | 76 ------------------- pandas/tests/extension/base/return_types.py | 17 ----- 2 files changed, 93 deletions(-) delete mode 100644 pandas/tests/arrays/masked/test_conversions.py diff --git a/pandas/tests/arrays/masked/test_conversions.py b/pandas/tests/arrays/masked/test_conversions.py deleted file mode 100644 index 567ce4d84d996..0000000000000 --- a/pandas/tests/arrays/masked/test_conversions.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import isna - - -@pytest.fixture( - params=[ - lambda s: s.tolist(), - lambda s: s.to_dict().values(), - lambda s: [v for _, v in s.iteritems()], - lambda s: list(iter(s)), - ] -) -def series_conversion(request): - return request.param - - -@pytest.fixture( - params=[ - lambda df: df.to_dict()["A"].values(), - lambda df: [record["A"] for record in df.to_dict(orient="records")], - ] -) -def frame_conversion(request): - return request.param - - -@pytest.fixture(params=[[True, False], [False, pd.NA], [False, np.nan]]) -def boolean_data(request): - return request.param - - -@pytest.fixture(params=[[1.0, 2.0], [1.0, pd.NA], [1.0, np.nan]]) -def float_data(request): - return request.param - - -@pytest.fixture(params=[[1, 2], [1, pd.NA], [1, np.nan]]) -def int_data(request): - return request.param - - -class TestSeriesReturnTypesArePythonNative: - def test_boolean(self, boolean_data, series_conversion): - # GH 29738 - s = pd.Series(boolean_data, dtype="boolean") - assert all(isinstance(val, bool) or isna(val) for val in series_conversion(s)) - - def test_float(self, float_data, float_ea_dtype, series_conversion): - # GH 29738 - s = pd.Series(float_data, dtype=float_ea_dtype) - assert all(isinstance(val, float) or isna(val) for val in series_conversion(s)) - - def test_int(self, int_data, any_nullable_int_dtype, series_conversion): - # GH 29738 - s = pd.Series(int_data, dtype=any_nullable_int_dtype) - assert all(isinstance(val, int) or isna(val) for val in series_conversion(s)) - - -class TestFrameReturnTypesArePythonNative: - def test_boolean(self, boolean_data, frame_conversion): - # GH 29738 - s = pd.DataFrame({"A": boolean_data}, dtype="boolean") - assert all(isinstance(val, bool) or isna(val) for val in frame_conversion(s)) - - def test_float(self, float_data, float_ea_dtype, frame_conversion): - # GH 29738 - s = pd.DataFrame({"A": float_data}, dtype=float_ea_dtype) - assert all(isinstance(val, float) or isna(val) for val in frame_conversion(s)) - - def test_int(self, int_data, any_nullable_int_dtype, frame_conversion): - # GH 29738 - s = pd.DataFrame({"A": int_data}, dtype=any_nullable_int_dtype) - assert all(isinstance(val, int) or isna(val) for val in frame_conversion(s)) diff --git a/pandas/tests/extension/base/return_types.py b/pandas/tests/extension/base/return_types.py index 4d667a1fce6ab..44b5e5736e7fa 100644 --- a/pandas/tests/extension/base/return_types.py +++ b/pandas/tests/extension/base/return_types.py @@ -58,20 +58,3 @@ def test_frame(self, all_data, func): native_dtype = self.get_native_dtype(all_data.dtype) assert all(isinstance(val, native_dtype) or isna(val) for val in func(s)) - - -# class TestFrameReturnTypesArePythonNative: -# def test_boolean(self, boolean_data, frame_conversion): -# # GH 29738 -# s = pd.DataFrame({"A": boolean_data}, dtype="boolean") -# assert all(isinstance(val, bool) or isna(val) for val in frame_conversion(s)) - -# def test_float(self, float_data, float_ea_dtype, frame_conversion): -# # GH 29738 -# s = pd.DataFrame({"A": float_data}, dtype=float_ea_dtype) -# assert all(isinstance(val, float) or isna(val) for val in frame_conversion(s)) - -# def test_int(self, int_data, any_nullable_int_dtype, frame_conversion): -# # GH 29738 -# s = pd.DataFrame({"A": int_data}, dtype=any_nullable_int_dtype) -# assert all(isinstance(val, int) or isna(val) for val in frame_conversion(s)) From 1df019c9c57e8fa02e59381300684d61ab50c165 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 3 Nov 2020 20:22:27 +0000 Subject: [PATCH 17/20] TST/BUG: implement jorisvandenbossche suggestion to fix astype(str) tests --- pandas/tests/extension/base/casting.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 4145a009dbfe4..805dd06f749f7 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -32,17 +32,15 @@ def test_tolist(self, data): expected = list(data) assert result == expected - @pytest.mark.skip(reason="Floating precision issues") def test_astype_str(self, data): result = pd.Series(data[:5]).astype(str) - expected = pd.Series([str(x) for x in data[:5]], dtype=str) + expected = pd.Series(data[:5], dtype=str) self.assert_series_equal(result, expected) - @pytest.mark.skip(reason="Floating precision issues") def test_astype_string(self, data): # GH-33465 result = pd.Series(data[:5]).astype("string") - expected = pd.Series([str(x) for x in data[:5]], dtype="string") + expected = pd.Series(data[:5], dtype="string") self.assert_series_equal(result, expected) def test_to_numpy(self, data): From 2a5df3e35bfb5167746c4ef062880b0362f49ed3 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 3 Nov 2020 21:17:07 +0000 Subject: [PATCH 18/20] TST: skip boolean combine_add test --- pandas/tests/extension/base/methods.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e0b0d935bb092..d01f10f4fad29 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -242,14 +242,17 @@ def test_combine_add(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - expected = pd.Series( - pd.array([a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]), - dtype=orig_data1.dtype, - ) - - # TODO: expected currently has an incorrect dtype - # fix construction and set check_type=True in assertion - self.assert_series_equal(result, expected, check_dtype=False) + # FIXME: construct expected for boolean case and enable + if orig_data1.dtype == "boolean": + return + + with np.errstate(over="ignore"): + expected = pd.Series( + orig_data1._from_sequence( + [a + b for (a, b) in zip(orig_data1, orig_data2)] + ) + ) + self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 + x2) @@ -449,7 +452,7 @@ def test_repeat(self, data, repeats, as_series, use_numpy): @pytest.mark.parametrize( "repeats, kwargs, error, msg", [ - (2, dict(axis=1), ValueError, "'axis"), + (2, dict(axis=1), ValueError, "axis"), (-1, dict(), ValueError, "negative"), ([1, 2], dict(), ValueError, "shape"), (2, dict(foo="bar"), TypeError, "'foo'"), From bcf0896f21aa6fc71d14d69f7df27cae5ad47405 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 29 Nov 2020 22:54:41 -0500 Subject: [PATCH 19/20] skip str astype tests for Float32Dtype --- pandas/tests/extension/base/casting.py | 4 ++-- pandas/tests/extension/test_floating.py | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 805dd06f749f7..039b42210224e 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -34,13 +34,13 @@ def test_tolist(self, data): def test_astype_str(self, data): result = pd.Series(data[:5]).astype(str) - expected = pd.Series(data[:5], dtype=str) + expected = pd.Series([str(x) for x in data[:5]], dtype=str) self.assert_series_equal(result, expected) def test_astype_string(self, data): # GH-33465 result = pd.Series(data[:5]).astype("string") - expected = pd.Series(data[:5], dtype="string") + expected = pd.Series([str(x) for x in data[:5]], dtype="string") self.assert_series_equal(result, expected) def test_to_numpy(self, data): diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 3f3c48aae3c89..7d9cfcaab59f3 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -190,7 +190,15 @@ def test_value_counts(self, all_data, dropna): class TestCasting(base.BaseCastingTests): - pass + def test_astype_str(self, data): + if data.dtype == pd.Float32Dtype(): + return + super().test_astype_str(data) + + def test_astype_string(self, data): + if data.dtype == pd.Float32Dtype(): + return + super().test_astype_string(data) class TestGroupby(base.BaseGroupbyTests): From 225a260901fd571a0b0bb46dd4f96f6117f20e59 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 31 Jan 2021 12:37:33 -0500 Subject: [PATCH 20/20] docstring fix --- pandas/core/arrays/floating.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 2c3b3d3c2f0b4..ab65d02cebf20 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -213,17 +213,17 @@ class FloatingArray(NumericArray): -------- Create an FloatingArray with :func:`pandas.array`: - >>> pd.array([0.1, None, 0.3], dtype=pd.Float32Dtype()) + >>> pd.array([0.1, None, 0.3], dtype=pd.Float64Dtype()) [0.1, , 0.3] - Length: 3, dtype: Float32 + Length: 3, dtype: Float64 String aliases for the dtypes are also available. They are capitalized. - >>> pd.array([0.1, None, 0.3], dtype="Float32") + >>> pd.array([0.1, None, 0.3], dtype="Float64") [0.1, , 0.3] - Length: 3, dtype: Float32 + Length: 3, dtype: Float64 """ # The value used to fill '_data' to avoid upcasting