diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c5f6019d7aeb5..a41e97831d104 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,13 +1,14 @@ import abc from collections import OrderedDict from datetime import date, datetime, timedelta +from io import BytesIO import os from textwrap import fill +from urllib.request import urlopen import warnings from pandas._config import config -import pandas.compat as compat from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_kwarg @@ -16,7 +17,9 @@ from pandas.core.frame import DataFrame -from pandas.io.common import _NA_VALUES, _stringify_path, _validate_header_arg +from pandas.io.common import ( + _NA_VALUES, _is_url, _stringify_path, _validate_header_arg, + get_filepath_or_buffer) from pandas.io.excel._util import ( _fill_mi_header, _get_default_writer, _maybe_convert_usecols, _pop_header_name, get_writer) @@ -329,6 +332,36 @@ def read_excel(io, class _BaseExcelReader(metaclass=abc.ABCMeta): + def __init__(self, filepath_or_buffer): + # If filepath_or_buffer is a url, load the data into a BytesIO + if _is_url(filepath_or_buffer): + filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) + elif not isinstance(filepath_or_buffer, + (ExcelFile, self._workbook_class)): + filepath_or_buffer, _, _, _ = get_filepath_or_buffer( + filepath_or_buffer) + + if isinstance(filepath_or_buffer, self._workbook_class): + self.book = filepath_or_buffer + elif hasattr(filepath_or_buffer, "read"): + # N.B. xlrd.Book has a read attribute too + filepath_or_buffer.seek(0) + self.book = self.load_workbook(filepath_or_buffer) + elif isinstance(filepath_or_buffer, str): + self.book = self.load_workbook(filepath_or_buffer) + else: + raise ValueError('Must explicitly set engine if not passing in' + ' buffer or path for io.') + + @property + @abc.abstractmethod + def _workbook_class(self): + pass + + @abc.abstractmethod + def load_workbook(self, filepath_or_buffer): + pass + @property @abc.abstractmethod def sheet_names(self): @@ -701,7 +734,7 @@ def _value_with_fmt(self, val): val = val.total_seconds() / float(86400) fmt = '0' else: - val = compat.to_str(val) + val = str(val) return val, fmt diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index efe64dca8deff..18e751274dab9 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -1,11 +1,8 @@ from datetime import time from distutils.version import LooseVersion -from io import UnsupportedOperation -from urllib.request import urlopen import numpy as np -from pandas.io.common import _is_url, get_filepath_or_buffer from pandas.io.excel._base import _BaseExcelReader @@ -30,35 +27,20 @@ def __init__(self, filepath_or_buffer): raise ImportError(err_msg + ". Current version " + xlrd.__VERSION__) - from pandas.io.excel._base import ExcelFile - # If filepath_or_buffer is a url, want to keep the data as bytes so - # can't pass to get_filepath_or_buffer() - if _is_url(filepath_or_buffer): - filepath_or_buffer = urlopen(filepath_or_buffer) - elif not isinstance(filepath_or_buffer, (ExcelFile, xlrd.Book)): - filepath_or_buffer, _, _, _ = get_filepath_or_buffer( - filepath_or_buffer) - - if isinstance(filepath_or_buffer, xlrd.Book): - self.book = filepath_or_buffer - elif hasattr(filepath_or_buffer, "read"): - # N.B. xlrd.Book has a read attribute too - if hasattr(filepath_or_buffer, 'seek'): - try: - # GH 19779 - filepath_or_buffer.seek(0) - except UnsupportedOperation: - # HTTPResponse does not support seek() - # GH 20434 - pass + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from xlrd import Book + return Book + def load_workbook(self, filepath_or_buffer): + from xlrd import open_workbook + if hasattr(filepath_or_buffer, "read"): data = filepath_or_buffer.read() - self.book = xlrd.open_workbook(file_contents=data) - elif isinstance(filepath_or_buffer, str): - self.book = xlrd.open_workbook(filepath_or_buffer) + return open_workbook(file_contents=data) else: - raise ValueError('Must explicitly set engine if not passing in' - ' buffer or path for io.') + return open_workbook(filepath_or_buffer) @property def sheet_names(self):