|
15 | 15 | import pandas.util.testing as tm
|
16 | 16 |
|
17 | 17 | from pandas.io.common import URLError
|
18 |
| -from pandas.io.excel import ExcelFile |
19 | 18 |
|
20 | 19 |
|
21 | 20 | @contextlib.contextmanager
|
@@ -736,121 +735,125 @@ class TestExcelFileRead:
|
736 | 735 | pytest.param(None, marks=pytest.mark.skipif(
|
737 | 736 | not td.safe_import("xlrd"), reason="no xlrd")),
|
738 | 737 | ])
|
739 |
| - def cd_and_set_engine(self, request, datapath, monkeypatch): |
| 738 | + def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext): |
740 | 739 | """
|
741 | 740 | Change directory and set engine for ExcelFile objects.
|
742 | 741 | """
|
| 742 | + if request.param == 'openpyxl' and read_ext == '.xls': |
| 743 | + pytest.skip() |
| 744 | + |
743 | 745 | func = partial(pd.ExcelFile, engine=request.param)
|
744 | 746 | monkeypatch.chdir(datapath("io", "data"))
|
745 | 747 | monkeypatch.setattr(pd, 'ExcelFile', func)
|
746 | 748 |
|
747 | 749 | def test_excel_passes_na(self, read_ext):
|
748 | 750 |
|
749 |
| - excel = ExcelFile('test4' + read_ext) |
750 |
| - |
751 |
| - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, |
752 |
| - na_values=['apple']) |
| 751 | + with pd.ExcelFile('test4' + read_ext) as excel: |
| 752 | + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, |
| 753 | + na_values=['apple']) |
753 | 754 | expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
|
754 | 755 | columns=['Test'])
|
755 | 756 | tm.assert_frame_equal(parsed, expected)
|
756 | 757 |
|
757 |
| - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, |
758 |
| - na_values=['apple']) |
| 758 | + with pd.ExcelFile('test4' + read_ext) as excel: |
| 759 | + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, |
| 760 | + na_values=['apple']) |
759 | 761 | expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
|
760 | 762 | columns=['Test'])
|
761 | 763 | tm.assert_frame_equal(parsed, expected)
|
762 | 764 |
|
763 | 765 | # 13967
|
764 |
| - excel = ExcelFile('test5' + read_ext) |
765 |
| - |
766 |
| - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, |
767 |
| - na_values=['apple']) |
| 766 | + with pd.ExcelFile('test5' + read_ext) as excel: |
| 767 | + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, |
| 768 | + na_values=['apple']) |
768 | 769 | expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
|
769 | 770 | columns=['Test'])
|
770 | 771 | tm.assert_frame_equal(parsed, expected)
|
771 | 772 |
|
772 |
| - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, |
773 |
| - na_values=['apple']) |
| 773 | + with pd.ExcelFile('test5' + read_ext) as excel: |
| 774 | + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, |
| 775 | + na_values=['apple']) |
774 | 776 | expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
|
775 | 777 | columns=['Test'])
|
776 | 778 | tm.assert_frame_equal(parsed, expected)
|
777 | 779 |
|
778 | 780 | @pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols'])
|
779 | 781 | def test_unexpected_kwargs_raises(self, read_ext, arg):
|
780 | 782 | # gh-17964
|
781 |
| - excel = ExcelFile('test1' + read_ext) |
782 |
| - |
783 | 783 | kwarg = {arg: 'Sheet1'}
|
784 | 784 | msg = "unexpected keyword argument `{}`".format(arg)
|
785 |
| - with pytest.raises(TypeError, match=msg): |
786 |
| - pd.read_excel(excel, **kwarg) |
787 | 785 |
|
788 |
| - def test_excel_table_sheet_by_index(self, read_ext, df_ref): |
| 786 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 787 | + with pytest.raises(TypeError, match=msg): |
| 788 | + pd.read_excel(excel, **kwarg) |
789 | 789 |
|
790 |
| - excel = ExcelFile('test1' + read_ext) |
| 790 | + def test_excel_table_sheet_by_index(self, read_ext, df_ref): |
791 | 791 |
|
792 |
| - df1 = pd.read_excel(excel, 0, index_col=0) |
793 |
| - df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) |
| 792 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 793 | + df1 = pd.read_excel(excel, 0, index_col=0) |
| 794 | + df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) |
794 | 795 | tm.assert_frame_equal(df1, df_ref, check_names=False)
|
795 | 796 | tm.assert_frame_equal(df2, df_ref, check_names=False)
|
796 | 797 |
|
797 |
| - df1 = excel.parse(0, index_col=0) |
798 |
| - df2 = excel.parse(1, skiprows=[1], index_col=0) |
| 798 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 799 | + df1 = excel.parse(0, index_col=0) |
| 800 | + df2 = excel.parse(1, skiprows=[1], index_col=0) |
799 | 801 | tm.assert_frame_equal(df1, df_ref, check_names=False)
|
800 | 802 | tm.assert_frame_equal(df2, df_ref, check_names=False)
|
801 | 803 |
|
802 |
| - df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) |
| 804 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 805 | + df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) |
803 | 806 | tm.assert_frame_equal(df3, df1.iloc[:-1])
|
804 | 807 |
|
805 | 808 | with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
806 |
| - df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) |
| 809 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 810 | + df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) |
| 811 | + |
807 | 812 | tm.assert_frame_equal(df3, df4)
|
808 | 813 |
|
809 |
| - df3 = excel.parse(0, index_col=0, skipfooter=1) |
810 |
| - tm.assert_frame_equal(df3, df1.iloc[:-1]) |
| 814 | + with pd.ExcelFile('test1' + read_ext) as excel: |
| 815 | + df3 = excel.parse(0, index_col=0, skipfooter=1) |
811 | 816 |
|
812 |
| - import xlrd # will move to engine-specific tests as new ones are added |
813 |
| - with pytest.raises(xlrd.XLRDError): |
814 |
| - pd.read_excel(excel, 'asdf') |
| 817 | + tm.assert_frame_equal(df3, df1.iloc[:-1]) |
815 | 818 |
|
816 | 819 | def test_sheet_name(self, read_ext, df_ref):
|
817 | 820 | filename = "test1"
|
818 | 821 | sheet_name = "Sheet1"
|
819 | 822 |
|
820 |
| - excel = ExcelFile(filename + read_ext) |
821 |
| - df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc |
822 |
| - df2_parse = excel.parse(index_col=0, |
823 |
| - sheet_name=sheet_name) |
| 823 | + with pd.ExcelFile(filename + read_ext) as excel: |
| 824 | + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc |
| 825 | + |
| 826 | + with pd.ExcelFile(filename + read_ext) as excel: |
| 827 | + df2_parse = excel.parse(index_col=0, |
| 828 | + sheet_name=sheet_name) |
824 | 829 |
|
825 | 830 | tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
|
826 | 831 | tm.assert_frame_equal(df2_parse, df_ref, check_names=False)
|
827 | 832 |
|
828 | 833 | def test_excel_read_buffer(self, read_ext):
|
829 |
| - |
830 | 834 | pth = 'test1' + read_ext
|
831 |
| - expected = pd.read_excel(pth, 'Sheet1', index_col=0) |
| 835 | + engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize |
| 836 | + expected = pd.read_excel(pth, 'Sheet1', index_col=0, engine=engine) |
832 | 837 |
|
833 | 838 | with open(pth, 'rb') as f:
|
834 |
| - xls = ExcelFile(f) |
835 |
| - actual = pd.read_excel(xls, 'Sheet1', index_col=0) |
| 839 | + with pd.ExcelFile(f) as xls: |
| 840 | + actual = pd.read_excel(xls, 'Sheet1', index_col=0) |
| 841 | + |
836 | 842 | tm.assert_frame_equal(expected, actual)
|
837 | 843 |
|
838 | 844 | def test_reader_closes_file(self, read_ext):
|
839 |
| - |
840 | 845 | f = open('test1' + read_ext, 'rb')
|
841 |
| - with ExcelFile(f) as xlsx: |
| 846 | + engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize |
| 847 | + with pd.ExcelFile(f) as xlsx: |
842 | 848 | # parses okay
|
843 |
| - pd.read_excel(xlsx, 'Sheet1', index_col=0) |
| 849 | + pd.read_excel(xlsx, 'Sheet1', index_col=0, engine=engine) |
844 | 850 |
|
845 | 851 | assert f.closed
|
846 | 852 |
|
847 |
| - @pytest.mark.parametrize('excel_engine', [ |
848 |
| - 'xlrd', |
849 |
| - None |
850 |
| - ]) |
851 |
| - def test_read_excel_engine_value(self, read_ext, excel_engine): |
| 853 | + def test_conflicting_excel_engines(self, read_ext): |
852 | 854 | # GH 26566
|
853 |
| - xl = ExcelFile("test1" + read_ext, engine=excel_engine) |
854 | 855 | msg = "Engine should not be specified when passing an ExcelFile"
|
855 |
| - with pytest.raises(ValueError, match=msg): |
856 |
| - pd.read_excel(xl, engine='openpyxl') |
| 856 | + |
| 857 | + with pd.ExcelFile("test1" + read_ext) as xl: |
| 858 | + with pytest.raises(ValueError, match=msg): |
| 859 | + pd.read_excel(xl, engine='foo') |
0 commit comments