Skip to content

Commit c0ebe19

Browse files
jbrockmendelphofl
authored andcommitted
TST: better exception messages with na_values and pyarrow engine (#56090)
* TST: better exception messages with na_values and pyarrow engine * remove commented-out * remove commented-out
1 parent aabf219 commit c0ebe19

File tree

3 files changed

+146
-30
lines changed

3 files changed

+146
-30
lines changed

pandas/io/parsers/arrow_parser_wrapper.py

+9
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,15 @@ def read(self) -> DataFrame:
250250
include = self.convert_options.get("include_columns", None)
251251
if include is not None:
252252
self._validate_usecols(include)
253+
254+
nulls = self.convert_options.get("null_values", set())
255+
if not lib.is_list_like(nulls) or not all(
256+
isinstance(x, str) for x in nulls
257+
):
258+
raise TypeError(
259+
"The 'pyarrow' engine requires all na_values to be strings"
260+
)
261+
253262
raise
254263

255264
try:

pandas/io/parsers/readers.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -1716,7 +1716,10 @@ def _clean_options(
17161716

17171717
# Converting values to NA
17181718
keep_default_na = options["keep_default_na"]
1719-
na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)
1719+
floatify = engine != "pyarrow"
1720+
na_values, na_fvalues = _clean_na_values(
1721+
na_values, keep_default_na, floatify=floatify
1722+
)
17201723

17211724
# handle skiprows; this is internally handled by the
17221725
# c-engine, so only need for python and pyarrow parsers
@@ -1928,7 +1931,7 @@ def TextParser(*args, **kwds) -> TextFileReader:
19281931
return TextFileReader(*args, **kwds)
19291932

19301933

1931-
def _clean_na_values(na_values, keep_default_na: bool = True):
1934+
def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = True):
19321935
na_fvalues: set | dict
19331936
if na_values is None:
19341937
if keep_default_na:
@@ -1956,7 +1959,7 @@ def _clean_na_values(na_values, keep_default_na: bool = True):
19561959
else:
19571960
if not is_list_like(na_values):
19581961
na_values = [na_values]
1959-
na_values = _stringify_na_values(na_values)
1962+
na_values = _stringify_na_values(na_values, floatify)
19601963
if keep_default_na:
19611964
na_values = na_values | STR_NA_VALUES
19621965

@@ -1978,7 +1981,7 @@ def _floatify_na_values(na_values):
19781981
return result
19791982

19801983

1981-
def _stringify_na_values(na_values):
1984+
def _stringify_na_values(na_values, floatify: bool):
19821985
"""return a stringified and numeric for these values"""
19831986
result: list[str | float] = []
19841987
for x in na_values:
@@ -1993,13 +1996,15 @@ def _stringify_na_values(na_values):
19931996
result.append(f"{v}.0")
19941997
result.append(str(v))
19951998

1996-
result.append(v)
1997-
except (TypeError, ValueError, OverflowError):
1998-
pass
1999-
try:
2000-
result.append(int(x))
1999+
if floatify:
2000+
result.append(v)
20012001
except (TypeError, ValueError, OverflowError):
20022002
pass
2003+
if floatify:
2004+
try:
2005+
result.append(int(x))
2006+
except (TypeError, ValueError, OverflowError):
2007+
pass
20032008
return set(result)
20042009

20052010

0 commit comments

Comments
 (0)