Skip to content

Commit bc020f6

Browse files
gfyoungWillAyd
authored andcommitted
TST: Document and test na_filter in read_excel (#29171)
1 parent 0e60bc9 commit bc020f6

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

pandas/io/excel/_base.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,24 @@
133133
+ fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ")
134134
+ """'.
135135
keep_default_na : bool, default True
136-
If na_values are specified and keep_default_na is False the default NaN
137-
values are overridden, otherwise they're appended to.
136+
Whether or not to include the default NaN values when parsing the data.
137+
Depending on whether `na_values` is passed in, the behavior is as follows:
138+
139+
* If `keep_default_na` is True, and `na_values` are specified, `na_values`
140+
is appended to the default NaN values used for parsing.
141+
* If `keep_default_na` is True, and `na_values` are not specified, only
142+
the default NaN values are used for parsing.
143+
* If `keep_default_na` is False, and `na_values` are specified, only
144+
the NaN values specified `na_values` are used for parsing.
145+
* If `keep_default_na` is False, and `na_values` are not specified, no
146+
strings will be parsed as NaN.
147+
148+
Note that if `na_filter` is passed in as False, the `keep_default_na` and
149+
`na_values` parameters will be ignored.
150+
na_filter : bool, default True
151+
Detect missing value markers (empty strings and the value of na_values). In
152+
data without any NAs, passing na_filter=False can improve the performance
153+
of reading a large file.
138154
verbose : bool, default False
139155
Indicate number of NA values placed in non-numeric columns.
140156
parse_dates : bool, list-like, or dict, default False

pandas/tests/io/excel/test_readers.py

+21
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,27 @@ def test_excel_passes_na(self, read_ext):
870870
)
871871
tm.assert_frame_equal(parsed, expected)
872872

873+
@pytest.mark.parametrize("na_filter", [None, True, False])
874+
def test_excel_passes_na_filter(self, read_ext, na_filter):
875+
# gh-25453
876+
kwargs = {}
877+
878+
if na_filter is not None:
879+
kwargs["na_filter"] = na_filter
880+
881+
with pd.ExcelFile("test5" + read_ext) as excel:
882+
parsed = pd.read_excel(
883+
excel, "Sheet1", keep_default_na=True, na_values=["apple"], **kwargs
884+
)
885+
886+
if na_filter is False:
887+
expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]]
888+
else:
889+
expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]]
890+
891+
expected = DataFrame(expected, columns=["Test"])
892+
tm.assert_frame_equal(parsed, expected)
893+
873894
@pytest.mark.parametrize("arg", ["sheet", "sheetname", "parse_cols"])
874895
def test_unexpected_kwargs_raises(self, read_ext, arg):
875896
# gh-17964

0 commit comments

Comments
 (0)