Skip to content

TST: de-xfail pyarrow parser tests #56071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,11 +399,16 @@ def test_escapechar(all_parsers):
tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_ignore_leading_whitespace(all_parsers):
# see gh-3374, gh-6607
parser = all_parsers
data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9"

if parser.engine == "pyarrow":
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), sep=r"\s+")
return
result = parser.read_csv(StringIO(data), sep=r"\s+")

expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]})
Expand Down Expand Up @@ -582,12 +587,14 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):

if sep == r"\s+":
data = data.replace(",", " ")

if parser.engine == "pyarrow":
mark = pytest.mark.xfail(
raises=ValueError,
reason="the 'pyarrow' engine does not support regex separators",
)
request.applymarker(mark)
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines
)
return

result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
expected = DataFrame(exp_data, columns=["A", "B", "C"])
Expand All @@ -610,7 +617,6 @@ def test_whitespace_lines(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
@pytest.mark.parametrize(
"data,expected",
[
Expand All @@ -635,6 +641,12 @@ def test_whitespace_lines(all_parsers):
def test_whitespace_regex_separator(all_parsers, data, expected):
# see gh-6607
parser = all_parsers
if parser.engine == "pyarrow":
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), sep=r"\s+")
return

result = parser.read_csv(StringIO(data), sep=r"\s+")
tm.assert_frame_equal(result, expected)

Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_temporary_file(all_parsers):
# see gh-13398
parser = all_parsers
Expand All @@ -246,6 +245,12 @@ def test_temporary_file(all_parsers):
new_file.flush()
new_file.seek(0)

if parser.engine == "pyarrow":
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(new_file, sep=r"\s+", header=None)
return

result = parser.read_csv(new_file, sep=r"\s+", header=None)

expected = DataFrame([[0, 0]])
Expand Down