From 522ffa86897e374f085a37edceb544446508b459 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sat, 13 Nov 2021 11:21:06 +0800 Subject: [PATCH 1/6] BUG GH44079 fix --- pandas/io/parsers/base_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 339585810bec1..f5baf8f0ffa39 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -698,7 +698,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected # "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type] - na_count = mask.sum() + na_count = mask.astype('uint8').sum() if na_count > 0: if is_integer_dtype(values): values = values.astype(np.float64) From eb8d28b6120512c1d5d04ef8f751886acaf71465 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sat, 13 Nov 2021 11:48:12 +0800 Subject: [PATCH 2/6] BUG GH44079 pre-commit checked --- pandas/io/parsers/base_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f5baf8f0ffa39..b514b3855e42f 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -698,7 +698,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected # "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin(values, list(na_values)) # type: ignore[arg-type] - na_count = mask.astype('uint8').sum() + na_count = mask.astype("uint8").sum() if na_count > 0: if is_integer_dtype(values): values = values.astype(np.float64) From 908389aa19653daa07f7dbe592de21ea41c688f0 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sun, 14 Nov 2021 11:54:42 +0800 Subject: [PATCH 3/6] BUG GH44079 added test --- pandas/tests/io/parser/test_index_col.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 646cb2029919d..ba32196b961d6 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -89,6 +89,13 @@ def test_infer_index_col(all_parsers): columns=["A", "B", "C"], ) tm.assert_frame_equal(result, expected) + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") + expected.index = expected.index.astype("object") @skip_pyarrow @@ -297,3 +304,24 @@ def test_multiindex_columns_index_col_with_data(all_parsers): index=Index(["data"]), ) tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_infer_types_boolean_sum(all_parsers): + # GH#44079 + parser = all_parsers + result = parser.read_csv( + StringIO("0,1"), + names=["a", "b"], + index_col=["a"], + dtype={"a": "UInt8"}, + ) + expected = DataFrame( + data={ + "a": [ + 0, + ], + "b": [1], + } + ).set_index("a") + tm.assert_frame_equal(result, expected, check_index_type=False) From fa430543dc37ee80ef8210d4a5c671c06f40674b Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sun, 14 Nov 2021 12:03:49 +0800 Subject: [PATCH 4/6] BUG GH44079 typofix --- pandas/tests/io/parser/test_index_col.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index ba32196b961d6..26e3e9c182b42 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -89,13 +89,6 @@ def test_infer_index_col(all_parsers): columns=["A", "B", "C"], ) tm.assert_frame_equal(result, expected) - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") - expected.index = expected.index.astype("object") @skip_pyarrow From 4e55c97ca437c0c2c4d829d02fc4d32055e5861e Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Sun, 14 Nov 2021 15:47:51 +0800 Subject: [PATCH 5/6] BUG GH44079 Fix a doc warning to pass github CI check --- doc/source/user_guide/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index a112c632ceb25..11369bc999086 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2074,7 +2074,7 @@ The ``period`` dtype can be used in ``.astype(...)``. It allows one to change th pi.astype("period[D]") # convert to DatetimeIndex - pi.astype("datetime64[ns]") + pi.to_timestamp() # convert to PeriodIndex dti = pd.date_range("2011-01-01", freq="M", periods=3) From 02ac5638106c6d5e75e75f1c89520a7d38e3da91 Mon Sep 17 00:00:00 2001 From: zhangxiaoxing <58493968+zhangxiaoxing@users.noreply.github.com> Date: Mon, 15 Nov 2021 09:22:22 +0800 Subject: [PATCH 6/6] BUG GH44079 added whatsnew note --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index a593a03de5c25..aee28faad0dfe 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -639,6 +639,7 @@ I/O - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`) +- Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read .csv files and infer index column dtype (:issue:`44079`) - Period