|
21 | 21 | Series,
|
22 | 22 | )
|
23 | 23 | import pandas._testing as tm
|
| 24 | +from pandas.core.arrays import ( |
| 25 | + ArrowStringArray, |
| 26 | + StringArray, |
| 27 | +) |
24 | 28 |
|
25 | 29 | read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"]
|
26 | 30 | engine_params = [
|
@@ -532,6 +536,84 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
|
532 | 536 | actual = pd.read_excel(basename + read_ext, dtype=dtype)
|
533 | 537 | tm.assert_frame_equal(actual, expected)
|
534 | 538 |
|
| 539 | + def test_use_nullable_dtypes(self, read_ext): |
| 540 | + # GH#36712 |
| 541 | + if read_ext == ".xlsb": |
| 542 | + pytest.skip("No engine for filetype: 'xlsb'") |
| 543 | + |
| 544 | + df = DataFrame( |
| 545 | + { |
| 546 | + "a": Series([1, 3], dtype="Int64"), |
| 547 | + "b": Series([2.5, 4.5], dtype="Float64"), |
| 548 | + "c": Series([True, False], dtype="boolean"), |
| 549 | + "d": Series(["a", "b"], dtype="string"), |
| 550 | + "e": Series([pd.NA, 6], dtype="Int64"), |
| 551 | + "f": Series([pd.NA, 7.5], dtype="Float64"), |
| 552 | + "g": Series([pd.NA, True], dtype="boolean"), |
| 553 | + "h": Series([pd.NA, "a"], dtype="string"), |
| 554 | + "i": Series([pd.Timestamp("2019-12-31")] * 2), |
| 555 | + "j": Series([pd.NA, pd.NA], dtype="Int64"), |
| 556 | + } |
| 557 | + ) |
| 558 | + with tm.ensure_clean(read_ext) as file_path: |
| 559 | + df.to_excel(file_path, "test", index=False) |
| 560 | + result = pd.read_excel( |
| 561 | + file_path, sheet_name="test", use_nullable_dtypes=True |
| 562 | + ) |
| 563 | + tm.assert_frame_equal(result, df) |
| 564 | + |
| 565 | + def test_use_nullabla_dtypes_and_dtype(self, read_ext): |
| 566 | + # GH#36712 |
| 567 | + if read_ext == ".xlsb": |
| 568 | + pytest.skip("No engine for filetype: 'xlsb'") |
| 569 | + |
| 570 | + df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]}) |
| 571 | + with tm.ensure_clean(read_ext) as file_path: |
| 572 | + df.to_excel(file_path, "test", index=False) |
| 573 | + result = pd.read_excel( |
| 574 | + file_path, sheet_name="test", use_nullable_dtypes=True, dtype="float64" |
| 575 | + ) |
| 576 | + tm.assert_frame_equal(result, df) |
| 577 | + |
| 578 | + @td.skip_if_no("pyarrow") |
| 579 | + @pytest.mark.parametrize("storage", ["pyarrow", "python"]) |
| 580 | + def test_use_nullabla_dtypes_string(self, read_ext, storage): |
| 581 | + # GH#36712 |
| 582 | + if read_ext == ".xlsb": |
| 583 | + pytest.skip("No engine for filetype: 'xlsb'") |
| 584 | + |
| 585 | + import pyarrow as pa |
| 586 | + |
| 587 | + with pd.option_context("mode.string_storage", storage): |
| 588 | + |
| 589 | + df = DataFrame( |
| 590 | + { |
| 591 | + "a": np.array(["a", "b"], dtype=np.object_), |
| 592 | + "b": np.array(["x", pd.NA], dtype=np.object_), |
| 593 | + } |
| 594 | + ) |
| 595 | + with tm.ensure_clean(read_ext) as file_path: |
| 596 | + df.to_excel(file_path, "test", index=False) |
| 597 | + result = pd.read_excel( |
| 598 | + file_path, sheet_name="test", use_nullable_dtypes=True |
| 599 | + ) |
| 600 | + |
| 601 | + if storage == "python": |
| 602 | + expected = DataFrame( |
| 603 | + { |
| 604 | + "a": StringArray(np.array(["a", "b"], dtype=np.object_)), |
| 605 | + "b": StringArray(np.array(["x", pd.NA], dtype=np.object_)), |
| 606 | + } |
| 607 | + ) |
| 608 | + else: |
| 609 | + expected = DataFrame( |
| 610 | + { |
| 611 | + "a": ArrowStringArray(pa.array(["a", "b"])), |
| 612 | + "b": ArrowStringArray(pa.array(["x", None])), |
| 613 | + } |
| 614 | + ) |
| 615 | + tm.assert_frame_equal(result, expected) |
| 616 | + |
535 | 617 | @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
|
536 | 618 | def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
|
537 | 619 | # GH#35211
|
|
0 commit comments