Skip to content

Commit b8b7400

Browse files
authored
BUG: Fix some parse dates tests (#43312)
1 parent ba21475 commit b8b7400

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

pandas/io/parsers/base_parser.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,9 @@ def _isindex(colspec):
10901090
colspec = orig_names[colspec]
10911091
if _isindex(colspec):
10921092
continue
1093-
data_dict[colspec] = converter(data_dict[colspec])
1093+
# Pyarrow engine returns Series which we need to convert to
1094+
# numpy array before converter, its a no-op for other parsers
1095+
data_dict[colspec] = converter(np.asarray(data_dict[colspec]))
10941096
else:
10951097
new_name, col, old_names = _try_convert_dates(
10961098
converter, colspec, data_dict, orig_names
@@ -1139,7 +1141,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns):
11391141
colnames.append(c)
11401142

11411143
new_name = "_".join([str(x) for x in colnames])
1142-
to_parse = [data_dict[c] for c in colnames if c in data_dict]
1144+
to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict]
11431145

11441146
new_col = parser(*to_parse)
11451147
return new_name, new_col, colnames

pandas/tests/io/parser/test_parse_dates.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
import pandas.io.date_converters as conv
4343
from pandas.io.parsers import read_csv
4444

45-
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
45+
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
4646

4747
# constant
4848
_DEFAULT_DATETIME = datetime(1, 1, 1)
@@ -54,6 +54,7 @@
5454
date_strategy = st.datetimes()
5555

5656

57+
@xfail_pyarrow
5758
def test_read_csv_with_custom_date_parser(all_parsers):
5859
# GH36111
5960
def __custom_date_parser(time):
@@ -91,6 +92,7 @@ def __custom_date_parser(time):
9192
tm.assert_frame_equal(result, expected)
9293

9394

95+
@xfail_pyarrow
9496
def test_separator_date_conflict(all_parsers):
9597
# Regression test for gh-4678
9698
#
@@ -112,6 +114,7 @@ def test_separator_date_conflict(all_parsers):
112114
tm.assert_frame_equal(df, expected)
113115

114116

117+
@xfail_pyarrow
115118
@pytest.mark.parametrize("keep_date_col", [True, False])
116119
def test_multiple_date_col_custom(all_parsers, keep_date_col):
117120
data = """\
@@ -267,6 +270,7 @@ def test_concat_date_col_fail(container, dim):
267270
parsing.concat_date_cols(date_cols)
268271

269272

273+
@xfail_pyarrow
270274
@pytest.mark.parametrize("keep_date_col", [True, False])
271275
def test_multiple_date_col(all_parsers, keep_date_col):
272276
data = """\
@@ -426,6 +430,7 @@ def test_date_col_as_index_col(all_parsers):
426430
tm.assert_frame_equal(result, expected)
427431

428432

433+
@xfail_pyarrow
429434
@pytest.mark.parametrize(
430435
"date_parser, warning",
431436
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -490,6 +495,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
490495
tm.assert_frame_equal(result, expected)
491496

492497

498+
@xfail_pyarrow
493499
def test_multiple_date_col_timestamp_parse(all_parsers):
494500
parser = all_parsers
495501
data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
@@ -524,6 +530,7 @@ def test_multiple_date_col_timestamp_parse(all_parsers):
524530
tm.assert_frame_equal(result, expected)
525531

526532

533+
@xfail_pyarrow
527534
def test_multiple_date_cols_with_header(all_parsers):
528535
parser = all_parsers
529536
data = """\
@@ -693,6 +700,7 @@ def test_date_parser_int_bug(all_parsers):
693700
tm.assert_frame_equal(result, expected)
694701

695702

703+
@xfail_pyarrow
696704
def test_nat_parse(all_parsers):
697705
# see gh-3062
698706
parser = all_parsers
@@ -708,6 +716,7 @@ def test_nat_parse(all_parsers):
708716
tm.assert_frame_equal(result, df)
709717

710718

719+
@xfail_pyarrow
711720
def test_csv_custom_parser(all_parsers):
712721
data = """A,B,C
713722
20090101,a,1,2
@@ -722,6 +731,7 @@ def test_csv_custom_parser(all_parsers):
722731
tm.assert_frame_equal(result, expected)
723732

724733

734+
@xfail_pyarrow
725735
def test_parse_dates_implicit_first_col(all_parsers):
726736
data = """A,B,C
727737
20090101,a,1,2
@@ -735,6 +745,7 @@ def test_parse_dates_implicit_first_col(all_parsers):
735745
tm.assert_frame_equal(result, expected)
736746

737747

748+
@xfail_pyarrow
738749
def test_parse_dates_string(all_parsers):
739750
data = """date,A,B,C
740751
20090101,a,1,2
@@ -779,6 +790,7 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
779790
tm.assert_frame_equal(result, expected)
780791

781792

793+
@xfail_pyarrow
782794
@pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]])
783795
def test_parse_dates_column_list(all_parsers, parse_dates):
784796
data = "a,b,c\n01/01/2010,1,15/02/2010"
@@ -795,6 +807,7 @@ def test_parse_dates_column_list(all_parsers, parse_dates):
795807
tm.assert_frame_equal(result, expected)
796808

797809

810+
@xfail_pyarrow
798811
@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
799812
def test_multi_index_parse_dates(all_parsers, index_col):
800813
data = """index1,index2,A,B,C
@@ -840,6 +853,7 @@ def test_multi_index_parse_dates(all_parsers, index_col):
840853
tm.assert_frame_equal(result, expected)
841854

842855

856+
@xfail_pyarrow
843857
@pytest.mark.parametrize("kwargs", [{"dayfirst": True}, {"day_first": True}])
844858
def test_parse_dates_custom_euro_format(all_parsers, kwargs):
845859
parser = all_parsers
@@ -884,6 +898,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
884898
)
885899

886900

901+
@xfail_pyarrow
887902
def test_parse_tz_aware(all_parsers):
888903
# See gh-1693
889904
parser = all_parsers
@@ -897,6 +912,7 @@ def test_parse_tz_aware(all_parsers):
897912
assert result.index.tz is pytz.utc
898913

899914

915+
@xfail_pyarrow
900916
@pytest.mark.parametrize(
901917
"parse_dates,index_col",
902918
[({"nominal": [1, 2]}, "nominal"), ({"nominal": [1, 2]}, 0), ([[1, 2]], 0)],
@@ -997,6 +1013,7 @@ def test_multiple_date_cols_index(all_parsers, parse_dates, index_col):
9971013
tm.assert_frame_equal(result, expected)
9981014

9991015

1016+
@xfail_pyarrow
10001017
def test_multiple_date_cols_chunked(all_parsers):
10011018
parser = all_parsers
10021019
data = """\
@@ -1089,6 +1106,7 @@ def test_multiple_date_cols_chunked(all_parsers):
10891106
tm.assert_frame_equal(chunks[2], expected[4:])
10901107

10911108

1109+
@xfail_pyarrow
10921110
def test_multiple_date_col_named_index_compat(all_parsers):
10931111
parser = all_parsers
10941112
data = """\
@@ -1112,6 +1130,7 @@ def test_multiple_date_col_named_index_compat(all_parsers):
11121130
tm.assert_frame_equal(with_indices, with_names)
11131131

11141132

1133+
@xfail_pyarrow
11151134
def test_multiple_date_col_multiple_index_compat(all_parsers):
11161135
parser = all_parsers
11171136
data = """\
@@ -1179,6 +1198,7 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
11791198
)
11801199

11811200

1201+
@xfail_pyarrow
11821202
def test_parse_dates_empty_string(all_parsers):
11831203
# see gh-2263
11841204
parser = all_parsers
@@ -1191,6 +1211,7 @@ def test_parse_dates_empty_string(all_parsers):
11911211
tm.assert_frame_equal(result, expected)
11921212

11931213

1214+
@xfail_pyarrow
11941215
@pytest.mark.parametrize(
11951216
"data,kwargs,expected",
11961217
[
@@ -1230,6 +1251,7 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
12301251
tm.assert_frame_equal(result, expected)
12311252

12321253

1254+
@xfail_pyarrow
12331255
@pytest.mark.parametrize(
12341256
"date_parser, warning",
12351257
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -1258,6 +1280,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni
12581280
tm.assert_frame_equal(result, expected)
12591281

12601282

1283+
@xfail_pyarrow
12611284
@pytest.mark.parametrize(
12621285
"date_parser, warning",
12631286
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -1346,6 +1369,7 @@ def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warni
13461369
tm.assert_frame_equal(result, expected)
13471370

13481371

1372+
@xfail_pyarrow
13491373
@pytest.mark.parametrize(
13501374
"date_parser, warning",
13511375
([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]),
@@ -1368,6 +1392,7 @@ def test_parse_date_fields(all_parsers, date_parser, warning):
13681392
tm.assert_frame_equal(result, expected)
13691393

13701394

1395+
@xfail_pyarrow
13711396
@pytest.mark.parametrize(
13721397
"date_parser, warning",
13731398
(
@@ -1399,6 +1424,7 @@ def test_parse_date_all_fields(all_parsers, date_parser, warning):
13991424
tm.assert_frame_equal(result, expected)
14001425

14011426

1427+
@xfail_pyarrow
14021428
@pytest.mark.parametrize(
14031429
"date_parser, warning",
14041430
(
@@ -1430,6 +1456,7 @@ def test_datetime_fractional_seconds(all_parsers, date_parser, warning):
14301456
tm.assert_frame_equal(result, expected)
14311457

14321458

1459+
@xfail_pyarrow
14331460
def test_generic(all_parsers):
14341461
parser = all_parsers
14351462
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
@@ -1448,6 +1475,7 @@ def test_generic(all_parsers):
14481475
tm.assert_frame_equal(result, expected)
14491476

14501477

1478+
@xfail_pyarrow
14511479
def test_date_parser_resolution_if_not_ns(all_parsers):
14521480
# see gh-10245
14531481
parser = all_parsers
@@ -1545,6 +1573,7 @@ def test_parse_timezone(all_parsers):
15451573
tm.assert_frame_equal(result, expected)
15461574

15471575

1576+
@xfail_pyarrow
15481577
@pytest.mark.parametrize(
15491578
"date_string",
15501579
["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"],
@@ -1556,6 +1585,7 @@ def test_invalid_parse_delimited_date(all_parsers, date_string):
15561585
tm.assert_frame_equal(result, expected)
15571586

15581587

1588+
@xfail_pyarrow
15591589
@pytest.mark.parametrize(
15601590
"date_string,dayfirst,expected",
15611591
[
@@ -1578,6 +1608,7 @@ def test_parse_delimited_date_swap_no_warning(
15781608
tm.assert_frame_equal(result, expected)
15791609

15801610

1611+
@xfail_pyarrow
15811612
@pytest.mark.parametrize(
15821613
"date_string,dayfirst,expected",
15831614
[
@@ -1647,6 +1678,7 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
16471678
assert result == expected
16481679

16491680

1681+
@xfail_pyarrow
16501682
@pytest.mark.parametrize(
16511683
"names, usecols, parse_dates, missing_cols",
16521684
[
@@ -1679,6 +1711,7 @@ def test_missing_parse_dates_column_raises(
16791711
)
16801712

16811713

1714+
@xfail_pyarrow
16821715
def test_date_parser_and_names(all_parsers):
16831716
# GH#33699
16841717
parser = all_parsers
@@ -1688,6 +1721,7 @@ def test_date_parser_and_names(all_parsers):
16881721
tm.assert_frame_equal(result, expected)
16891722

16901723

1724+
@xfail_pyarrow
16911725
def test_date_parser_usecols_thousands(all_parsers):
16921726
# GH#39365
16931727
data = """A,B,C

0 commit comments

Comments
 (0)