Skip to content

Commit e13d808

Browse files
abeltavaresAbel Tavares
and
Abel Tavares
authored
ENH: DtypeWarning message enhancement (#58250)
Co-authored-by: Abel Tavares <[email protected]>
1 parent 90b66d8 commit e13d808

File tree

5 files changed

+16
-9
lines changed

5 files changed

+16
-9
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other enhancements
3737
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)
3838
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
3939
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
40+
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
4041
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4142
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4243

pandas/errors/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ class DtypeWarning(Warning):
178178
... ) # doctest: +SKIP
179179
>>> df.to_csv("test.csv", index=False) # doctest: +SKIP
180180
>>> df2 = pd.read_csv("test.csv") # doctest: +SKIP
181-
... # DtypeWarning: Columns (0) have mixed types
181+
... # DtypeWarning: Columns (0: a) have mixed types
182182
183183
Important to notice that ``df2`` will contain both `str` and `int` for the
184184
same input, '1'.

pandas/io/parsers/c_parser_wrapper.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def read(
235235
if self.low_memory:
236236
chunks = self._reader.read_low_memory(nrows)
237237
# destructive to chunks
238-
data = _concatenate_chunks(chunks)
238+
data = _concatenate_chunks(chunks, self.names) # type: ignore[has-type]
239239

240240
else:
241241
data = self._reader.read(nrows)
@@ -358,7 +358,9 @@ def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True):
358358
return values
359359

360360

361-
def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
361+
def _concatenate_chunks(
362+
chunks: list[dict[int, ArrayLike]], column_names: list[str]
363+
) -> dict:
362364
"""
363365
Concatenate chunks of data read with low_memory=True.
364366
@@ -381,10 +383,12 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
381383
else:
382384
result[name] = concat_compat(arrs)
383385
if len(non_cat_dtypes) > 1 and result[name].dtype == np.dtype(object):
384-
warning_columns.append(str(name))
386+
warning_columns.append(column_names[name])
385387

386388
if warning_columns:
387-
warning_names = ",".join(warning_columns)
389+
warning_names = ", ".join(
390+
[f"{index}: {name}" for index, name in enumerate(warning_columns)]
391+
)
388392
warning_message = " ".join(
389393
[
390394
f"Columns ({warning_names}) have mixed types. "

pandas/tests/io/parser/common/test_chunksize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
253253
else:
254254
df = parser.read_csv_check_warnings(
255255
warning_type,
256-
r"Columns \(0\) have mixed types. "
256+
r"Columns \(0: a\) have mixed types. "
257257
"Specify dtype option on import or set low_memory=False.",
258258
buf,
259259
)

pandas/tests/io/parser/test_concatenate_chunks.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def test_concatenate_chunks_pyarrow():
1616
{0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
1717
{0: ArrowExtensionArray(pa.array([1, 2]))},
1818
]
19-
result = _concatenate_chunks(chunks)
19+
result = _concatenate_chunks(chunks, ["column_0", "column_1"])
2020
expected = ArrowExtensionArray(pa.array([1.5, 2.5, 1.0, 2.0]))
2121
tm.assert_extension_array_equal(result[0], expected)
2222

@@ -28,8 +28,10 @@ def test_concatenate_chunks_pyarrow_strings():
2828
{0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
2929
{0: ArrowExtensionArray(pa.array(["a", "b"]))},
3030
]
31-
with tm.assert_produces_warning(DtypeWarning, match="have mixed types"):
32-
result = _concatenate_chunks(chunks)
31+
with tm.assert_produces_warning(
32+
DtypeWarning, match="Columns \\(0: column_0\\) have mixed types"
33+
):
34+
result = _concatenate_chunks(chunks, ["column_0", "column_1"])
3335
expected = np.concatenate(
3436
[np.array([1.5, 2.5], dtype=object), np.array(["a", "b"])]
3537
)

0 commit comments

Comments
 (0)