From 522ffa86897e374f085a37edceb544446508b459 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sat, 13 Nov 2021 11:21:06 +0800 Subject: [PATCH 1/4] BUG GH44079 fix --- pandas/io/parsers/base_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 339585810bec1..f5baf8f0ffa39 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -698,7 +698,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected # "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type] - na_count = mask.sum() + na_count = mask.astype('uint8').sum() if na_count > 0: if is_integer_dtype(values): values = values.astype(np.float64) From eb8d28b6120512c1d5d04ef8f751886acaf71465 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sat, 13 Nov 2021 11:48:12 +0800 Subject: [PATCH 2/4] BUG GH44079 pre-commit checked --- pandas/io/parsers/base_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f5baf8f0ffa39..b514b3855e42f 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -698,7 +698,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected # "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type] - na_count = mask.astype('uint8').sum() + na_count = mask.astype("uint8").sum() if na_count > 0: if is_integer_dtype(values): values = values.astype(np.float64) From 908389aa19653daa07f7dbe592de21ea41c688f0 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sun, 14 Nov 2021 11:54:42 +0800 Subject: [PATCH 3/4] BUG GH44079 added test --- pandas/tests/io/parser/test_index_col.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 646cb2029919d..ba32196b961d6 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -89,6 +89,13 @@ def test_infer_index_col(all_parsers): columns=["A", "B", "C"], ) tm.assert_frame_equal(result, expected) + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") @skip_pyarrow @@ -297,3 +304,24 @@ def test_multiindex_columns_index_col_with_data(all_parsers): index=Index(["data"]), ) tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_infer_types_boolean_sum(all_parsers): + # GH#44079 + parser = all_parsers + result = parser.read_csv( + StringIO("0,1"), + names=["a", "b"], + index_col=["a"], + dtype={"a": "UInt8"}, + ) + expected = DataFrame( + data={ + "a": [ + 0, + ], + "b": [1], + } + ).set_index("a") + tm.assert_frame_equal(result, expected, check_index_type=False) From fa430543dc37ee80ef8210d4a5c671c06f40674b Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sun, 14 Nov 2021 12:03:49 +0800 Subject: [PATCH 4/4] BUG GH44079 typofix --- pandas/tests/io/parser/test_index_col.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index ba32196b961d6..26e3e9c182b42 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -89,13 +89,6 @@ def test_infer_index_col(all_parsers): columns=["A", "B", "C"], ) tm.assert_frame_equal(result, expected) - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") @skip_pyarrow