diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 100de227aa97c..6db3d1d4ab34d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -22,7 +22,7 @@ from pandas.io.common import URLError from pandas.io.excel import ( ExcelFile, ExcelWriter, _OpenpyxlWriter, _XlsxWriter, _XlwtWriter, - read_excel, register_writer) + register_writer) from pandas.io.formats.excel import ExcelFormatter from pandas.io.parsers import read_csv @@ -53,7 +53,6 @@ class SharedItems: @pytest.fixture(autouse=True) def setup_method(self, datapath): - self.dirpath = datapath("io", "data") self.frame = _frame.copy() self.frame2 = _frame2.copy() self.tsframe = _tsframe.copy() @@ -65,135 +64,85 @@ class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase @pytest.fixture(autouse=True, params=['xlrd', None]) - def set_engine(self, request): - func_name = "get_exceldf" - old_func = getattr(self, func_name) - new_func = partial(old_func, engine=request.param) - setattr(self, func_name, new_func) - yield - setattr(self, func_name, old_func) - - def get_csv_refdf(self, basename): + def cd_and_set_engine(self, request, datapath, monkeypatch): """ - Obtain the reference data from read_csv with the Python engine. - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - dfref : DataFrame + Change directory and set engine for read_excel calls. """ - pref = os.path.join(self.dirpath, basename + '.csv') - dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python') - return dfref + func = partial(pd.read_excel, engine=request.param) + monkeypatch.chdir(datapath("io", "data")) + monkeypatch.setattr(pd, 'read_excel', func) - def get_excelfile(self, basename, ext): + @pytest.fixture + def df_ref(self): """ - Return test data ExcelFile instance. - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - excel : io.excel.ExcelFile - """ - return ExcelFile(os.path.join(self.dirpath, basename + ext)) - - def get_exceldf(self, basename, ext, *args, **kwds): - """ - Return test data DataFrame. - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - df : DataFrame + Obtain the reference data from read_csv with the Python engine. """ - pth = os.path.join(self.dirpath, basename + ext) - return read_excel(pth, *args, **kwds) + df_ref = read_csv('test1.csv', index_col=0, + parse_dates=True, engine='python') + return df_ref @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 - def test_usecols_int(self, ext): - - df_ref = self.get_csv_refdf("test1") + def test_usecols_int(self, ext, df_ref): df_ref = df_ref.reindex(columns=["A", "B", "C"]) # usecols as int with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): with ignore_xlrd_time_clock_warning(): - df1 = self.get_exceldf("test1", ext, "Sheet1", - index_col=0, usecols=3) + df1 = pd.read_excel("test1" + ext, "Sheet1", + index_col=0, usecols=3) # usecols as int with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): with ignore_xlrd_time_clock_warning(): - df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], - index_col=0, usecols=3) + df2 = pd.read_excel("test1" + ext, "Sheet2", skiprows=[1], + index_col=0, usecols=3) # TODO add index to xls file) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 - def test_usecols_list(self, ext): + def test_usecols_list(self, ext, df_ref): - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, - usecols=[0, 2, 3]) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols=[0, 2, 3]) + df_ref = df_ref.reindex(columns=['B', 'C']) + df1 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, + usecols=[0, 2, 3]) + df2 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1], + index_col=0, usecols=[0, 2, 3]) # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 - def test_usecols_str(self, ext): + def test_usecols_str(self, ext, df_ref): - dfref = self.get_csv_refdf('test1') - - df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, - usecols='A:D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols='A:D') + df1 = df_ref.reindex(columns=['A', 'B', 'C']) + df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, + usecols='A:D') + df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A:D') # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) - df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, - usecols='A,C,D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols='A,C,D') + df1 = df_ref.reindex(columns=['B', 'C']) + df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, + usecols='A,C,D') + df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) - df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, - usecols='A,C:D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols='A,C:D') + df1 = df_ref.reindex(columns=['B', 'C']) + df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, + usecols='A,C:D') + df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -202,50 +151,52 @@ def test_usecols_str(self, ext): [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0], ]) - def test_usecols_diff_positional_int_columns_order(self, ext, usecols): - expected = self.get_csv_refdf("test1")[["A", "C"]] - result = self.get_exceldf("test1", ext, "Sheet1", - index_col=0, usecols=usecols) + def test_usecols_diff_positional_int_columns_order( + self, ext, usecols, df_ref): + expected = df_ref[["A", "C"]] + result = pd.read_excel("test1" + ext, "Sheet1", + index_col=0, usecols=usecols) tm.assert_frame_equal(result, expected, check_names=False) @pytest.mark.parametrize("usecols", [ ["B", "D"], ["D", "B"] ]) - def test_usecols_diff_positional_str_columns_order(self, ext, usecols): - expected = self.get_csv_refdf("test1")[["B", "D"]] + def test_usecols_diff_positional_str_columns_order( + self, ext, usecols, df_ref): + expected = df_ref[["B", "D"]] expected.index = range(len(expected)) - result = self.get_exceldf("test1", ext, "Sheet1", usecols=usecols) + result = pd.read_excel("test1" + ext, "Sheet1", usecols=usecols) tm.assert_frame_equal(result, expected, check_names=False) - def test_read_excel_without_slicing(self, ext): - expected = self.get_csv_refdf("test1") - result = self.get_exceldf("test1", ext, "Sheet1", index_col=0) + def test_read_excel_without_slicing(self, ext, df_ref): + expected = df_ref + result = pd.read_excel("test1" + ext, "Sheet1", index_col=0) tm.assert_frame_equal(result, expected, check_names=False) - def test_usecols_excel_range_str(self, ext): - expected = self.get_csv_refdf("test1")[["C", "D"]] - result = self.get_exceldf("test1", ext, "Sheet1", - index_col=0, usecols="A,D:E") + def test_usecols_excel_range_str(self, ext, df_ref): + expected = df_ref[["C", "D"]] + result = pd.read_excel("test1" + ext, "Sheet1", + index_col=0, usecols="A,D:E") tm.assert_frame_equal(result, expected, check_names=False) def test_usecols_excel_range_str_invalid(self, ext): msg = "Invalid column name: E1" with pytest.raises(ValueError, match=msg): - self.get_exceldf("test1", ext, "Sheet1", usecols="D:E1") + pd.read_excel("test1" + ext, "Sheet1", usecols="D:E1") def test_index_col_label_error(self, ext): msg = "list indices must be integers.*, not str" with pytest.raises(TypeError, match=msg): - self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], - usecols=["A", "C"]) + pd.read_excel("test1" + ext, "Sheet1", index_col=["A"], + usecols=["A", "C"]) def test_index_col_empty(self, ext): # see gh-9208 - result = self.get_exceldf("test1", ext, "Sheet3", - index_col=["A", "B", "C"]) + result = pd.read_excel("test1" + ext, "Sheet3", + index_col=["A", "B", "C"]) expected = DataFrame(columns=["D", "E", "F"], index=MultiIndex(levels=[[]] * 3, codes=[[]] * 3, @@ -255,8 +206,7 @@ def test_index_col_empty(self, ext): @pytest.mark.parametrize("index_col", [None, 2]) def test_index_col_with_unnamed(self, ext, index_col): # see gh-18792 - result = self.get_exceldf("test1", ext, "Sheet4", - index_col=index_col) + result = pd.read_excel("test1" + ext, "Sheet4", index_col=index_col) expected = DataFrame([["i1", "a", "x"], ["i2", "b", "y"]], columns=["Unnamed: 0", "col1", "col2"]) if index_col: @@ -269,54 +219,54 @@ def test_usecols_pass_non_existent_column(self, ext): "columns expected but not found: " + r"\['E'\]") with pytest.raises(ValueError, match=msg): - self.get_exceldf("test1", ext, usecols=["E"]) + pd.read_excel("test1" + ext, usecols=["E"]) def test_usecols_wrong_type(self, ext): msg = ("'usecols' must either be list-like of " "all strings, all unicode, all integers or a callable.") with pytest.raises(ValueError, match=msg): - self.get_exceldf("test1", ext, usecols=["E1", 0]) + pd.read_excel("test1" + ext, usecols=["E1", 0]) def test_excel_stop_iterator(self, ext): - parsed = self.get_exceldf('test2', ext, 'Sheet1') + parsed = pd.read_excel('test2' + ext, 'Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected) def test_excel_cell_error_na(self, ext): - parsed = self.get_exceldf('test3', ext, 'Sheet1') + parsed = pd.read_excel('test3' + ext, 'Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) tm.assert_frame_equal(parsed, expected) def test_excel_passes_na(self, ext): - excel = self.get_excelfile('test4', ext) + excel = ExcelFile('test4' + ext) - parsed = read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, + na_values=['apple']) expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) - parsed = read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, + na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) # 13967 - excel = self.get_excelfile('test5', ext) + excel = ExcelFile('test5' + ext) - parsed = read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, + na_values=['apple']) expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) - parsed = read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, + na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) @@ -325,34 +275,33 @@ def test_excel_passes_na(self, ext): @pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols']) def test_unexpected_kwargs_raises(self, ext, arg): # gh-17964 - excel = self.get_excelfile('test1', ext) + excel = ExcelFile('test1' + ext) kwarg = {arg: 'Sheet1'} msg = "unexpected keyword argument `{}`".format(arg) with pytest.raises(TypeError, match=msg): - read_excel(excel, **kwarg) + pd.read_excel(excel, **kwarg) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 - def test_excel_table_sheet_by_index(self, ext): + def test_excel_table_sheet_by_index(self, ext, df_ref): - excel = self.get_excelfile('test1', ext) - dfref = self.get_csv_refdf('test1') + excel = ExcelFile('test1' + ext) - df1 = read_excel(excel, 0, index_col=0) - df2 = read_excel(excel, 1, skiprows=[1], index_col=0) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) + df1 = pd.read_excel(excel, 0, index_col=0) + df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) df1 = excel.parse(0, index_col=0) df2 = excel.parse(1, skiprows=[1], index_col=0) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) - df3 = read_excel(excel, 0, index_col=0, skipfooter=1) + df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df4 = read_excel(excel, 0, index_col=0, skip_footer=1) + df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) tm.assert_frame_equal(df3, df4) df3 = excel.parse(0, index_col=0, skipfooter=1) @@ -360,21 +309,18 @@ def test_excel_table_sheet_by_index(self, ext): import xlrd with pytest.raises(xlrd.XLRDError): - read_excel(excel, 'asdf') - - def test_excel_table(self, ext): + pd.read_excel(excel, 'asdf') - dfref = self.get_csv_refdf('test1') + def test_excel_table(self, ext, df_ref): - df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0) + df1 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0) + df2 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1], + index_col=0) # TODO add index to file - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) - df3 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, - skipfooter=1) + df3 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) def test_reader_special_dtypes(self, ext): @@ -393,32 +339,32 @@ def test_reader_special_dtypes(self, ext): basename = 'test_types' # should read in correctly and infer types - actual = self.get_exceldf(basename, ext, 'Sheet1') + actual = pd.read_excel(basename + ext, 'Sheet1') tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False) + actual = pd.read_excel(basename + ext, 'Sheet1', convert_float=False) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): - actual = self.get_exceldf(basename, ext, 'Sheet1', index_col=icol) + actual = pd.read_excel(basename + ext, 'Sheet1', index_col=icol) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) - actual = self.get_exceldf( - basename, ext, 'Sheet1', converters={"StrCol": str}) + actual = pd.read_excel(basename + ext, 'Sheet1', + converters={"StrCol": str}) tm.assert_frame_equal(actual, expected) no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) - actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False, - converters={"StrCol": str}) + actual = pd.read_excel(basename + ext, 'Sheet1', convert_float=False, + converters={"StrCol": str}) tm.assert_frame_equal(actual, no_convert_float) # GH8212 - support for converters and missing values @@ -441,14 +387,13 @@ def test_reader_converters(self, ext): # should read in correctly and set types of single cells (not array # dtypes) - actual = self.get_exceldf(basename, ext, 'Sheet1', - converters=converters) + actual = pd.read_excel(basename + ext, 'Sheet1', converters=converters) tm.assert_frame_equal(actual, expected) def test_reader_dtype(self, ext): # GH 8212 basename = 'testdtype' - actual = self.get_exceldf(basename, ext) + actual = pd.read_excel(basename + ext) expected = DataFrame({ 'a': [1, 2, 3, 4], @@ -459,10 +404,10 @@ def test_reader_dtype(self, ext): tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf(basename, ext, - dtype={'a': 'float64', - 'b': 'float32', - 'c': str}) + actual = pd.read_excel(basename + ext, + dtype={'a': 'float64', + 'b': 'float32', + 'c': str}) expected['a'] = expected['a'].astype('float64') expected['b'] = expected['b'].astype('float32') @@ -470,7 +415,7 @@ def test_reader_dtype(self, ext): tm.assert_frame_equal(actual, expected) with pytest.raises(ValueError): - self.get_exceldf(basename, ext, dtype={'d': 'int64'}) + pd.read_excel(basename + ext, dtype={'d': 'int64'}) @pytest.mark.parametrize("dtype,expected", [ (None, @@ -496,7 +441,7 @@ def test_reader_dtype_str(self, ext, dtype, expected): # see gh-20377 basename = "testdtype" - actual = self.get_exceldf(basename, ext, dtype=dtype) + actual = pd.read_excel(basename + ext, dtype=dtype) tm.assert_frame_equal(actual, expected) def test_reading_all_sheets(self, ext): @@ -504,7 +449,7 @@ def test_reading_all_sheets(self, ext): # Ensure a dict is returned. # See PR #9450 basename = 'test_multisheet' - dfs = self.get_exceldf(basename, ext, sheet_name=None) + dfs = pd.read_excel(basename + ext, sheet_name=None) # ensure this is not alphabetical to test order preservation expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) @@ -521,7 +466,7 @@ def test_reading_multiple_specific_sheets(self, ext): basename = 'test_multisheet' # Explicitly request duplicates. Only the set should be returned. expected_keys = [2, 'Charlie', 'Charlie'] - dfs = self.get_exceldf(basename, ext, sheet_name=expected_keys) + dfs = pd.read_excel(basename + ext, sheet_name=expected_keys) expected_keys = list(set(expected_keys)) tm.assert_contains_all(expected_keys, dfs.keys()) assert len(expected_keys) == len(dfs.keys()) @@ -531,18 +476,18 @@ def test_reading_all_sheets_with_blank(self, ext): # In the case where some sheets are blank. # Issue #11711 basename = 'blank_with_header' - dfs = self.get_exceldf(basename, ext, sheet_name=None) + dfs = pd.read_excel(basename + ext, sheet_name=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] tm.assert_contains_all(expected_keys, dfs.keys()) # GH6403 def test_read_excel_blank(self, ext): - actual = self.get_exceldf('blank', ext, 'Sheet1') + actual = pd.read_excel('blank' + ext, 'Sheet1') tm.assert_frame_equal(actual, DataFrame()) def test_read_excel_blank_with_header(self, ext): expected = DataFrame(columns=['col_1', 'col_2']) - actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') + actual = pd.read_excel('blank_with_header' + ext, 'Sheet1') tm.assert_frame_equal(actual, expected) def test_date_conversion_overflow(self, ext): @@ -552,11 +497,11 @@ def test_date_conversion_overflow(self, ext): [1e+20, 'Timothy Brown']], columns=['DateColWithBigInt', 'StringCol']) - result = self.get_exceldf('testdateoverflow', ext) + result = pd.read_excel('testdateoverflow' + ext) tm.assert_frame_equal(result, expected) @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 - def test_sheet_name_and_sheetname(self, ext): + def test_sheet_name_and_sheetname(self, ext, df_ref): # gh-10559: Minor improvement: Change "sheet_name" to "sheetname" # gh-10969: DOC: Consistent var names (sheetname vs sheet_name) # gh-12604: CLN GH10559 Rename sheetname variable to sheet_name @@ -565,14 +510,13 @@ def test_sheet_name_and_sheetname(self, ext): filename = "test1" sheet_name = "Sheet1" - df_ref = self.get_csv_refdf(filename) - df1 = self.get_exceldf(filename, ext, - sheet_name=sheet_name, index_col=0) # doc + df1 = pd.read_excel(filename + ext, + sheet_name=sheet_name, index_col=0) # doc with ignore_xlrd_time_clock_warning(): - df2 = self.get_exceldf(filename, ext, index_col=0, - sheet_name=sheet_name) + df2 = pd.read_excel(filename + ext, index_col=0, + sheet_name=sheet_name) - excel = self.get_excelfile(filename, ext) + excel = ExcelFile(filename + ext) df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc df2_parse = excel.parse(index_col=0, sheet_name=sheet_name) @@ -584,55 +528,53 @@ def test_sheet_name_and_sheetname(self, ext): def test_excel_read_buffer(self, ext): - pth = os.path.join(self.dirpath, 'test1' + ext) - expected = read_excel(pth, 'Sheet1', index_col=0) + pth = 'test1' + ext + expected = pd.read_excel(pth, 'Sheet1', index_col=0) with open(pth, 'rb') as f: - actual = read_excel(f, 'Sheet1', index_col=0) + actual = pd.read_excel(f, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) with open(pth, 'rb') as f: xls = ExcelFile(f) - actual = read_excel(xls, 'Sheet1', index_col=0) + actual = pd.read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) def test_bad_engine_raises(self, ext): bad_engine = 'foo' with pytest.raises(ValueError, match="Unknown engine: foo"): - read_excel('', engine=bad_engine) + pd.read_excel('', engine=bad_engine) @tm.network def test_read_from_http_url(self, ext): url = ('https://raw.github.com/pandas-dev/pandas/master/' 'pandas/tests/io/data/test1' + ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1', ext) + url_table = pd.read_excel(url) + local_table = pd.read_excel('test1' + ext) tm.assert_frame_equal(url_table, local_table) @td.skip_if_not_us_locale def test_read_from_s3_url(self, ext, s3_resource): # Bucket "pandas-test" created in tests/io/conftest.py - file_name = os.path.join(self.dirpath, 'test1' + ext) - - with open(file_name, "rb") as f: + with open('test1' + ext, "rb") as f: s3_resource.Bucket("pandas-test").put_object(Key="test1" + ext, Body=f) url = ('s3://pandas-test/test1' + ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1', ext) + url_table = pd.read_excel(url) + local_table = pd.read_excel('test1' + ext) tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow # ignore warning from old xlrd @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning") - def test_read_from_file_url(self, ext): + def test_read_from_file_url(self, ext, datapath): # FILE - localtable = os.path.join(self.dirpath, 'test1' + ext) - local_table = read_excel(localtable) + localtable = os.path.join(datapath("io", "data"), 'test1' + ext) + local_table = pd.read_excel(localtable) try: - url_table = read_excel('file://localhost/' + localtable) + url_table = pd.read_excel('file://localhost/' + localtable) except URLError: # fails on some systems import platform @@ -646,11 +588,11 @@ def test_read_from_pathlib_path(self, ext): # GH12655 from pathlib import Path - str_path = os.path.join(self.dirpath, 'test1' + ext) - expected = read_excel(str_path, 'Sheet1', index_col=0) + str_path = 'test1' + ext + expected = pd.read_excel(str_path, 'Sheet1', index_col=0) - path_obj = Path(self.dirpath, 'test1' + ext) - actual = read_excel(path_obj, 'Sheet1', index_col=0) + path_obj = Path('test1' + ext) + actual = pd.read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) @@ -660,22 +602,20 @@ def test_read_from_py_localpath(self, ext): # GH12655 from py.path import local as LocalPath - str_path = os.path.join(self.dirpath, 'test1' + ext) - expected = read_excel(str_path, 'Sheet1', index_col=0) + str_path = os.path.join('test1' + ext) + expected = pd.read_excel(str_path, 'Sheet1', index_col=0) - abs_dir = os.path.abspath(self.dirpath) - path_obj = LocalPath(abs_dir).join('test1' + ext) - actual = read_excel(path_obj, 'Sheet1', index_col=0) + path_obj = LocalPath().join('test1' + ext) + actual = pd.read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) def test_reader_closes_file(self, ext): - pth = os.path.join(self.dirpath, 'test1' + ext) - f = open(pth, 'rb') + f = open('test1' + ext, 'rb') with ExcelFile(f) as xlsx: # parses okay - read_excel(xlsx, 'Sheet1', index_col=0) + pd.read_excel(xlsx, 'Sheet1', index_col=0) assert f.closed @@ -694,16 +634,16 @@ def test_reader_seconds(self, ext): time(16, 37, 0, 900000), time(18, 20, 54)]}) - actual = self.get_exceldf('times_1900', ext, 'Sheet1') + actual = pd.read_excel('times_1900' + ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf('times_1904', ext, 'Sheet1') + actual = pd.read_excel('times_1904' + ext, 'Sheet1') tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, ext): # see gh-4679 mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) - mi_file = os.path.join(self.dirpath, "testmultiindex" + ext) + mi_file = "testmultiindex" + ext # "mi_column" sheet expected = DataFrame([[1, 2.5, pd.Timestamp("2015-01-01"), True], @@ -712,34 +652,37 @@ def test_read_excel_multiindex(self, ext): [4, 5.5, pd.Timestamp("2015-01-04"), True]], columns=mi) - actual = read_excel(mi_file, "mi_column", header=[0, 1], index_col=0) + actual = pd.read_excel( + mi_file, "mi_column", header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) # "mi_index" sheet expected.index = mi expected.columns = ["a", "b", "c", "d"] - actual = read_excel(mi_file, "mi_index", index_col=[0, 1]) + actual = pd.read_excel(mi_file, "mi_index", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) # "both" sheet expected.columns = mi - actual = read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1]) + actual = pd.read_excel( + mi_file, "both", index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) # "mi_index_name" sheet expected.columns = ["a", "b", "c", "d"] expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = read_excel(mi_file, "mi_index_name", index_col=[0, 1]) + actual = pd.read_excel( + mi_file, "mi_index_name", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # "mi_column_name" sheet expected.index = list(range(4)) expected.columns = mi.set_names(["c1", "c2"]) - actual = read_excel(mi_file, "mi_column_name", - header=[0, 1], index_col=0) + actual = pd.read_excel(mi_file, "mi_column_name", + header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) # see gh-11317 @@ -747,29 +690,29 @@ def test_read_excel_multiindex(self, ext): expected.columns = mi.set_levels( [1, 2], level=1).set_names(["c1", "c2"]) - actual = read_excel(mi_file, "name_with_int", - index_col=0, header=[0, 1]) + actual = pd.read_excel(mi_file, "name_with_int", + index_col=0, header=[0, 1]) tm.assert_frame_equal(actual, expected) # "both_name" sheet expected.columns = mi.set_names(["c1", "c2"]) expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = read_excel(mi_file, "both_name", - index_col=[0, 1], header=[0, 1]) + actual = pd.read_excel(mi_file, "both_name", + index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected) # "both_skiprows" sheet - actual = read_excel(mi_file, "both_name_skiprows", index_col=[0, 1], - header=[0, 1], skiprows=2) + actual = pd.read_excel(mi_file, "both_name_skiprows", index_col=[0, 1], + header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex_header_only(self, ext): # see gh-11733. # # Don't try to parse a header name if there isn't one. - mi_file = os.path.join(self.dirpath, "testmultiindex" + ext) - result = read_excel(mi_file, "index_col_none", header=[0, 1]) + mi_file = "testmultiindex" + ext + result = pd.read_excel(mi_file, "index_col_none", header=[0, 1]) exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")]) expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns) @@ -778,7 +721,6 @@ def test_read_excel_multiindex_header_only(self, ext): def test_excel_old_index_format(self, ext): # see gh-4679 filename = "test_index_name_pre17" + ext - in_file = os.path.join(self.dirpath, filename) # We detect headers to determine if index names exist, so # that "index" name in the "names" version of the data will @@ -801,12 +743,12 @@ def test_excel_old_index_format(self, ext): expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, "single_names", index_col=0) + actual = pd.read_excel(filename, "single_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, "multi_names", index_col=[0, 1]) + actual = pd.read_excel(filename, "multi_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # The analogous versions of the "names" version data @@ -828,31 +770,28 @@ def test_excel_old_index_format(self, ext): expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, "single_no_names", index_col=0) + actual = pd.read_excel(filename, "single_no_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, "multi_no_names", index_col=[0, 1]) + actual = pd.read_excel(filename, "multi_no_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self, ext): # GH 6114 for arg in [True, False]: with pytest.raises(TypeError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), - header=arg) + pd.read_excel('test1' + ext, header=arg) def test_read_excel_chunksize(self, ext): # GH 8011 with pytest.raises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), - chunksize=100) + pd.read_excel('test1' + ext, chunksize=100) def test_read_excel_skiprows_list(self, ext): # GH 4903 - actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + ext), + actual = pd.read_excel('testskiprows' + ext, 'skiprows_list', skiprows=[0, 2]) expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], [2, 3.5, pd.Timestamp('2015-01-02'), False], @@ -861,41 +800,35 @@ def test_read_excel_skiprows_list(self, ext): columns=['a', 'b', 'c', 'd']) tm.assert_frame_equal(actual, expected) - actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + ext), + actual = pd.read_excel('testskiprows' + ext, 'skiprows_list', skiprows=np.array([0, 2])) tm.assert_frame_equal(actual, expected) def test_read_excel_nrows(self, ext): # GH 16645 num_rows_to_pull = 5 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), - nrows=num_rows_to_pull) - expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + ext)) + actual = pd.read_excel('test1' + ext, nrows=num_rows_to_pull) + expected = pd.read_excel('test1' + ext) expected = expected[:num_rows_to_pull] tm.assert_frame_equal(actual, expected) def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): # GH 16645 - expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + ext)) + expected = pd.read_excel('test1' + ext) num_records_in_file = len(expected) num_rows_to_pull = num_records_in_file + 10 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), - nrows=num_rows_to_pull) + actual = pd.read_excel('test1' + ext, nrows=num_rows_to_pull) tm.assert_frame_equal(actual, expected) def test_read_excel_nrows_non_integer_parameter(self, ext): # GH 16645 msg = "'nrows' must be an integer >=0" with pytest.raises(ValueError, match=msg): - pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), - nrows='5') + pd.read_excel('test1' + ext, nrows='5') def test_read_excel_squeeze(self, ext): # GH 12157 - f = os.path.join(self.dirpath, 'test_squeeze' + ext) + f = 'test_squeeze' + ext actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') @@ -934,7 +867,7 @@ def test_read_one_empty_col_no_header(self, ext, header, expected): with ensure_clean(ext) as path: df.to_excel(path, filename, index=False, header=False) - result = read_excel(path, filename, usecols=[0], header=header) + result = pd.read_excel(path, filename, usecols=[0], header=header) tm.assert_frame_equal(result, expected) @@ -955,7 +888,7 @@ def test_read_one_empty_col_with_header(self, ext, header, expected): with ensure_clean(ext) as path: df.to_excel(path, 'with_header', index=False, header=True) - result = read_excel(path, filename, usecols=[0], header=header) + result = pd.read_excel(path, filename, usecols=[0], header=header) tm.assert_frame_equal(result, expected) @@ -976,10 +909,10 @@ def test_set_column_names_in_parameter(self, ext): refdf.columns = ['A', 'B'] with ExcelFile(pth) as reader: - xlsdf_no_head = read_excel(reader, 'Data_no_head', - header=None, names=['A', 'B']) - xlsdf_with_head = read_excel(reader, 'Data_with_head', - index_col=None, names=['A', 'B']) + xlsdf_no_head = pd.read_excel(reader, 'Data_no_head', + header=None, names=['A', 'B']) + xlsdf_with_head = pd.read_excel( + reader, 'Data_with_head', index_col=None, names=['A', 'B']) tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf) @@ -1005,7 +938,7 @@ def tdf(col_sheet_name): for sheetname, df in dfs.items(): df.to_excel(ew, sheetname) - dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0) + dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0) for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) @@ -1101,15 +1034,15 @@ def test_read_excel_parse_dates(self, ext): with ensure_clean(ext) as pth: df2.to_excel(pth) - res = read_excel(pth, index_col=0) + res = pd.read_excel(pth, index_col=0) tm.assert_frame_equal(df2, res) - res = read_excel(pth, parse_dates=["date_strings"], index_col=0) + res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0) tm.assert_frame_equal(df, res) date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y") - res = read_excel(pth, parse_dates=["date_strings"], - date_parser=date_parser, index_col=0) + res = pd.read_excel(pth, parse_dates=["date_strings"], + date_parser=date_parser, index_col=0) tm.assert_frame_equal(df, res) @@ -1134,11 +1067,11 @@ def test_read_xlrd_book(self, ext): book = xlrd.open_workbook(pth) with ExcelFile(book, engine=engine) as xl: - result = read_excel(xl, sheet_name, index_col=0) + result = pd.read_excel(xl, sheet_name, index_col=0) tm.assert_frame_equal(df, result) - result = read_excel(book, sheet_name=sheet_name, - engine=engine, index_col=0) + result = pd.read_excel(book, sheet_name=sheet_name, + engine=engine, index_col=0) tm.assert_frame_equal(df, result) @@ -1192,12 +1125,12 @@ def test_excel_sheet_by_name_raise(self, *_): gt.to_excel(self.path) xl = ExcelFile(self.path) - df = read_excel(xl, 0, index_col=0) + df = pd.read_excel(xl, 0, index_col=0) tm.assert_frame_equal(gt, df) with pytest.raises(xlrd.XLRDError): - read_excel(xl, "0") + pd.read_excel(xl, "0") def test_excel_writer_context_manager(self, *_): with ExcelWriter(self.path) as writer: @@ -1205,8 +1138,8 @@ def test_excel_writer_context_manager(self, *_): self.frame2.to_excel(writer, "Data2") with ExcelFile(self.path) as reader: - found_df = read_excel(reader, "Data1", index_col=0) - found_df2 = read_excel(reader, "Data2", index_col=0) + found_df = pd.read_excel(reader, "Data1", index_col=0) + found_df2 = pd.read_excel(reader, "Data2", index_col=0) tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2) @@ -1221,47 +1154,49 @@ def test_roundtrip(self, merge_cells, engine, ext): # test roundtrip self.frame.to_excel(self.path, 'test1') - recons = read_excel(self.path, 'test1', index_col=0) + recons = pd.read_excel(self.path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(self.path, 'test1', index=False) - recons = read_excel(self.path, 'test1', index_col=None) + recons = pd.read_excel(self.path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(self.path, 'test1', na_rep='NA') - recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA']) + recons = pd.read_excel( + self.path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) # GH 3611 self.frame.to_excel(self.path, 'test1', na_rep='88') - recons = read_excel(self.path, 'test1', index_col=0, na_values=['88']) + recons = pd.read_excel( + self.path, 'test1', index_col=0, na_values=['88']) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(self.path, 'test1', na_rep='88') - recons = read_excel(self.path, 'test1', index_col=0, - na_values=[88, 88.0]) + recons = pd.read_excel( + self.path, 'test1', index_col=0, na_values=[88, 88.0]) tm.assert_frame_equal(self.frame, recons) # GH 6573 self.frame.to_excel(self.path, 'Sheet1') - recons = read_excel(self.path, index_col=0) + recons = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(self.path, '0') - recons = read_excel(self.path, index_col=0) + recons = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(self.frame, recons) # GH 8825 Pandas Series should provide to_excel method s = self.frame["A"] s.to_excel(self.path) - recons = read_excel(self.path, index_col=0) + recons = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(s.to_frame(), recons) def test_mixed(self, merge_cells, engine, ext): self.mixed_frame.to_excel(self.path, 'test1') reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', index_col=0) + recons = pd.read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) def test_ts_frame(self, *_): @@ -1270,7 +1205,7 @@ def test_ts_frame(self, *_): df.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) def test_basics_with_nan(self, merge_cells, engine, ext): @@ -1290,18 +1225,18 @@ def test_int_types(self, merge_cells, engine, ext, np_type): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(self.path, "test1", index_col=0) + recons2 = pd.read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. float_frame = frame.astype(float) - recons = read_excel(self.path, "test1", - convert_float=False, index_col=0) + recons = pd.read_excel(self.path, "test1", + convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, check_index_type=False, check_column_type=False) @@ -1314,7 +1249,7 @@ def test_float_types(self, merge_cells, engine, ext, np_type): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0).astype(np_type) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False) @@ -1325,7 +1260,7 @@ def test_bool_types(self, merge_cells, engine, ext, np_type): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0).astype(np_type) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(frame, recons) @@ -1334,7 +1269,7 @@ def test_inf_roundtrip(self, *_): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(frame, recons) @@ -1352,9 +1287,9 @@ def test_sheets(self, merge_cells, engine, ext): self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', index_col=0) + recons = pd.read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) - recons = read_excel(reader, 'test2', index_col=0) + recons = pd.read_excel(reader, 'test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) assert 2 == len(reader.sheet_names) assert 'test1' == reader.sheet_names[0] @@ -1372,7 +1307,7 @@ def test_colaliases(self, merge_cells, engine, ext): col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(self.path, 'test1', header=col_aliases) reader = ExcelFile(self.path) - rs = read_excel(reader, 'test1', index_col=0) + rs = pd.read_excel(reader, 'test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs) @@ -1391,9 +1326,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext): index_label=['test'], merge_cells=merge_cells) reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) + recons = pd.read_excel( + reader, 'test1', index_col=0).astype(np.int64) frame.index.names = ['test'] assert frame.index.names == recons.index.names @@ -1403,9 +1337,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext): index_label=['test', 'dummy', 'dummy2'], merge_cells=merge_cells) reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) + recons = pd.read_excel( + reader, 'test1', index_col=0).astype(np.int64) frame.index.names = ['test'] assert frame.index.names == recons.index.names @@ -1415,9 +1348,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext): index_label='test', merge_cells=merge_cells) reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) + recons = pd.read_excel( + reader, 'test1', index_col=0).astype(np.int64) frame.index.names = ['test'] tm.assert_frame_equal(frame, recons.astype(bool)) @@ -1430,7 +1362,7 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext): df = df.set_index(['A', 'B']) reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', index_col=[0, 1]) + recons = pd.read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(df, recons, check_less_precise=True) def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): @@ -1440,8 +1372,7 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): df.to_excel(self.path, merge_cells=merge_cells) xf = ExcelFile(self.path) - result = read_excel(xf, xf.sheet_names[0], - index_col=0) + result = pd.read_excel(xf, xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) assert result.index.name == 'foo' @@ -1454,7 +1385,7 @@ def test_excel_roundtrip_datetime(self, merge_cells, *_): tsf.to_excel(self.path, "test1", merge_cells=merge_cells) reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(self.tsframe, recons) @@ -1488,8 +1419,8 @@ def test_excel_date_datetime_format(self, merge_cells, engine, ext): reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) - rs1 = read_excel(reader1, "test1", index_col=0) - rs2 = read_excel(reader2, "test1", index_col=0) + rs1 = pd.read_excel(reader1, "test1", index_col=0) + rs2 = pd.read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) @@ -1511,7 +1442,7 @@ def test_to_excel_interval_no_labels(self, *_): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_interval_labels(self, *_): @@ -1529,7 +1460,7 @@ def test_to_excel_interval_labels(self, *_): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_timedelta(self, *_): @@ -1547,7 +1478,7 @@ def test_to_excel_timedelta(self, *_): frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, merge_cells, engine, ext): @@ -1557,7 +1488,7 @@ def test_to_excel_periodindex(self, merge_cells, engine, ext): xp.to_excel(self.path, 'sht1') reader = ExcelFile(self.path) - rs = read_excel(reader, 'sht1', index_col=0) + rs = pd.read_excel(reader, 'sht1', index_col=0) tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self, merge_cells, engine, ext): @@ -1573,7 +1504,7 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext): # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) - df = read_excel(reader, 'test1', index_col=[0, 1]) + df = pd.read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -1584,7 +1515,7 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): frame = frame.set_index(['A', 'B']) frame.to_excel(self.path, merge_cells=merge_cells) - df = read_excel(self.path, index_col=[0, 1]) + df = pd.read_excel(self.path, index_col=[0, 1]) tm.assert_frame_equal(frame, df) # Test for Issue 11328. If column indices are integers, make @@ -1607,8 +1538,7 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) - df = read_excel(reader, 'test1', header=header, - index_col=[0, 1]) + df = pd.read_excel(reader, 'test1', header=header, index_col=[0, 1]) if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) @@ -1624,8 +1554,7 @@ def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): tsframe.index.names = ['time', 'foo'] tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1', - index_col=[0, 1]) + recons = pd.read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ('time', 'foo') @@ -1647,7 +1576,7 @@ def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, # Read it back in. reader = ExcelFile(self.path) - frame3 = read_excel(reader, 'test1') + frame3 = pd.read_excel(reader, 'test1') # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) @@ -1659,7 +1588,7 @@ def test_to_excel_float_format(self, *_): df.to_excel(self.path, "test1", float_format="%.2f") reader = ExcelFile(self.path) - result = read_excel(reader, "test1", index_col=0) + result = pd.read_excel(reader, "test1", index_col=0) expected = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], @@ -1675,8 +1604,8 @@ def test_to_excel_output_encoding(self, merge_cells, engine, ext): with ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") - result = read_excel(filename, "TestSheet", - encoding="utf8", index_col=0) + result = pd.read_excel(filename, "TestSheet", + encoding="utf8", index_col=0) tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, merge_cells, engine, ext): @@ -1694,7 +1623,7 @@ def test_to_excel_unicode_filename(self, merge_cells, engine, ext): df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) - result = read_excel(reader, "test1", index_col=0) + result = pd.read_excel(reader, "test1", index_col=0) expected = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], @@ -1812,7 +1741,7 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): merge_cells=merge_cells, index=index) xf = ExcelFile(self.path) - return read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return pd.read_excel(xf, xf.sheet_names[0], header=parser_hdr) # Basic test. parser_header = 0 if use_headers else None @@ -1860,12 +1789,12 @@ def test_duplicated_columns(self, *_): columns=["A", "B", "B.1"]) # By default, we mangle. - result = read_excel(self.path, "test1", index_col=0) + result = pd.read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(result, expected) # Explicitly, we pass in the parameter. - result = read_excel(self.path, "test1", index_col=0, - mangle_dupe_cols=True) + result = pd.read_excel(self.path, "test1", index_col=0, + mangle_dupe_cols=True) tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 @@ -1873,21 +1802,22 @@ def test_duplicated_columns(self, *_): columns=["A", "B", "A", "B"]) df.to_excel(self.path, "test1") - result = read_excel(self.path, "test1", index_col=0) + result = pd.read_excel(self.path, "test1", index_col=0) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"]) tm.assert_frame_equal(result, expected) # see gh-10982 df.to_excel(self.path, "test1", index=False, header=False) - result = read_excel(self.path, "test1", header=None) + result = pd.read_excel(self.path, "test1", header=None) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) tm.assert_frame_equal(result, expected) msg = "Setting mangle_dupe_cols=False is not supported yet" with pytest.raises(ValueError, match=msg): - read_excel(self.path, "test1", header=None, mangle_dupe_cols=False) + pd.read_excel( + self.path, "test1", header=None, mangle_dupe_cols=False) def test_swapped_columns(self, merge_cells, engine, ext): # Test for issue #5427. @@ -1895,7 +1825,7 @@ def test_swapped_columns(self, merge_cells, engine, ext): 'B': [2, 2, 2]}) write_frame.to_excel(self.path, 'test1', columns=['B', 'A']) - read_frame = read_excel(self.path, 'test1', header=0) + read_frame = pd.read_excel(self.path, 'test1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B']) @@ -1910,7 +1840,7 @@ def test_invalid_columns(self, *_): write_frame.to_excel(self.path, "test1", columns=["B", "C"]) expected = write_frame.reindex(columns=["B", "C"]) - read_frame = read_excel(self.path, "test1", index_col=0) + read_frame = pd.read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) with pytest.raises(KeyError): @@ -1919,7 +1849,7 @@ def test_invalid_columns(self, *_): def test_comment_arg(self, *_): # see gh-18735 # - # Test the comment argument functionality to read_excel. + # Test the comment argument functionality to pd.read_excel. # Create file to read in. df = DataFrame({"A": ["one", "#one", "one"], @@ -1927,18 +1857,18 @@ def test_comment_arg(self, *_): df.to_excel(self.path, "test_c") # Read file without comment arg. - result1 = read_excel(self.path, "test_c", index_col=0) + result1 = pd.read_excel(self.path, "test_c", index_col=0) result1.iloc[1, 0] = None result1.iloc[1, 1] = None result1.iloc[2, 1] = None - result2 = read_excel(self.path, "test_c", comment="#", index_col=0) + result2 = pd.read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result1, result2) def test_comment_default(self, merge_cells, engine, ext): # Re issue #18735 - # Test the comment argument default to read_excel + # Test the comment argument default to pd.read_excel # Create file to read in df = DataFrame({'A': ['one', '#one', 'one'], @@ -1946,8 +1876,8 @@ def test_comment_default(self, merge_cells, engine, ext): df.to_excel(self.path, 'test_c') # Read file with default and explicit comment=None - result1 = read_excel(self.path, 'test_c') - result2 = read_excel(self.path, 'test_c', comment=None) + result1 = pd.read_excel(self.path, 'test_c') + result2 = pd.read_excel(self.path, 'test_c', comment=None) tm.assert_frame_equal(result1, result2) def test_comment_used(self, *_): @@ -1963,19 +1893,19 @@ def test_comment_used(self, *_): # Test read_frame_comment against manually produced expected output. expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) - result = read_excel(self.path, "test_c", comment="#", index_col=0) + result = pd.read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result, expected) def test_comment_empty_line(self, merge_cells, engine, ext): # Re issue #18735 - # Test that read_excel ignores commented lines at the end of file + # Test that pd.read_excel ignores commented lines at the end of file df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) df.to_excel(self.path, index=False) # Test that all-comment lines at EoF are ignored expected = DataFrame({'a': [1], 'b': [2]}) - result = read_excel(self.path, comment='#') + result = pd.read_excel(self.path, comment='#') tm.assert_frame_equal(result, expected) def test_datetimes(self, merge_cells, engine, ext): @@ -1995,7 +1925,7 @@ def test_datetimes(self, merge_cells, engine, ext): write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(self.path, 'Sheet1') - read_frame = read_excel(self.path, 'Sheet1', header=0) + read_frame = pd.read_excel(self.path, 'Sheet1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A']) @@ -2010,7 +1940,7 @@ def test_bytes_io(self, merge_cells, engine, ext): writer.save() bio.seek(0) - reread_df = read_excel(bio, index_col=0) + reread_df = pd.read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df) def test_write_lists_dict(self, *_): @@ -2019,7 +1949,7 @@ def test_write_lists_dict(self, *_): "numeric": [1, 2, 3.0], "str": ["apple", "banana", "cherry"]}) df.to_excel(self.path, "Sheet1") - read = read_excel(self.path, "Sheet1", header=0, index_col=0) + read = pd.read_excel(self.path, "Sheet1", header=0, index_col=0) expected = df.copy() expected.mixed = expected.mixed.apply(str) @@ -2033,8 +1963,8 @@ def test_true_and_false_value_options(self, *_): expected = df.replace({"foo": True, "bar": False}) df.to_excel(self.path) - read_frame = read_excel(self.path, true_values=["foo"], - false_values=["bar"], index_col=0) + read_frame = pd.read_excel(self.path, true_values=["foo"], + false_values=["bar"], index_col=0) tm.assert_frame_equal(read_frame, expected) def test_freeze_panes(self, *_): @@ -2042,7 +1972,7 @@ def test_freeze_panes(self, *_): expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) - result = read_excel(self.path, index_col=0) + result = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(result, expected) def test_path_path_lib(self, merge_cells, engine, ext):