Skip to content

Commit 7eff627

Browse files
tdamsmaWillAyd
authored andcommitted
Refactor init for Excel readers to _BaseExcelReader (#26233)
1 parent b6324be commit 7eff627

File tree

2 files changed

+47
-32
lines changed

2 files changed

+47
-32
lines changed

pandas/io/excel/_base.py

+36-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import abc
22
from collections import OrderedDict
33
from datetime import date, datetime, timedelta
4+
from io import BytesIO
45
import os
56
from textwrap import fill
7+
from urllib.request import urlopen
68
import warnings
79

810
from pandas._config import config
911

10-
import pandas.compat as compat
1112
from pandas.errors import EmptyDataError
1213
from pandas.util._decorators import Appender, deprecate_kwarg
1314

@@ -16,7 +17,9 @@
1617

1718
from pandas.core.frame import DataFrame
1819

19-
from pandas.io.common import _NA_VALUES, _stringify_path, _validate_header_arg
20+
from pandas.io.common import (
21+
_NA_VALUES, _is_url, _stringify_path, _validate_header_arg,
22+
get_filepath_or_buffer)
2023
from pandas.io.excel._util import (
2124
_fill_mi_header, _get_default_writer, _maybe_convert_usecols,
2225
_pop_header_name, get_writer)
@@ -329,6 +332,36 @@ def read_excel(io,
329332

330333
class _BaseExcelReader(metaclass=abc.ABCMeta):
331334

335+
def __init__(self, filepath_or_buffer):
336+
# If filepath_or_buffer is a url, load the data into a BytesIO
337+
if _is_url(filepath_or_buffer):
338+
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
339+
elif not isinstance(filepath_or_buffer,
340+
(ExcelFile, self._workbook_class)):
341+
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
342+
filepath_or_buffer)
343+
344+
if isinstance(filepath_or_buffer, self._workbook_class):
345+
self.book = filepath_or_buffer
346+
elif hasattr(filepath_or_buffer, "read"):
347+
# N.B. xlrd.Book has a read attribute too
348+
filepath_or_buffer.seek(0)
349+
self.book = self.load_workbook(filepath_or_buffer)
350+
elif isinstance(filepath_or_buffer, str):
351+
self.book = self.load_workbook(filepath_or_buffer)
352+
else:
353+
raise ValueError('Must explicitly set engine if not passing in'
354+
' buffer or path for io.')
355+
356+
@property
357+
@abc.abstractmethod
358+
def _workbook_class(self):
359+
pass
360+
361+
@abc.abstractmethod
362+
def load_workbook(self, filepath_or_buffer):
363+
pass
364+
332365
@property
333366
@abc.abstractmethod
334367
def sheet_names(self):
@@ -701,7 +734,7 @@ def _value_with_fmt(self, val):
701734
val = val.total_seconds() / float(86400)
702735
fmt = '0'
703736
else:
704-
val = compat.to_str(val)
737+
val = str(val)
705738

706739
return val, fmt
707740

pandas/io/excel/_xlrd.py

+11-29
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
from datetime import time
22
from distutils.version import LooseVersion
3-
from io import UnsupportedOperation
4-
from urllib.request import urlopen
53

64
import numpy as np
75

8-
from pandas.io.common import _is_url, get_filepath_or_buffer
96
from pandas.io.excel._base import _BaseExcelReader
107

118

@@ -30,35 +27,20 @@ def __init__(self, filepath_or_buffer):
3027
raise ImportError(err_msg +
3128
". Current version " + xlrd.__VERSION__)
3229

33-
from pandas.io.excel._base import ExcelFile
34-
# If filepath_or_buffer is a url, want to keep the data as bytes so
35-
# can't pass to get_filepath_or_buffer()
36-
if _is_url(filepath_or_buffer):
37-
filepath_or_buffer = urlopen(filepath_or_buffer)
38-
elif not isinstance(filepath_or_buffer, (ExcelFile, xlrd.Book)):
39-
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
40-
filepath_or_buffer)
41-
42-
if isinstance(filepath_or_buffer, xlrd.Book):
43-
self.book = filepath_or_buffer
44-
elif hasattr(filepath_or_buffer, "read"):
45-
# N.B. xlrd.Book has a read attribute too
46-
if hasattr(filepath_or_buffer, 'seek'):
47-
try:
48-
# GH 19779
49-
filepath_or_buffer.seek(0)
50-
except UnsupportedOperation:
51-
# HTTPResponse does not support seek()
52-
# GH 20434
53-
pass
30+
super().__init__(filepath_or_buffer)
31+
32+
@property
33+
def _workbook_class(self):
34+
from xlrd import Book
35+
return Book
5436

37+
def load_workbook(self, filepath_or_buffer):
38+
from xlrd import open_workbook
39+
if hasattr(filepath_or_buffer, "read"):
5540
data = filepath_or_buffer.read()
56-
self.book = xlrd.open_workbook(file_contents=data)
57-
elif isinstance(filepath_or_buffer, str):
58-
self.book = xlrd.open_workbook(filepath_or_buffer)
41+
return open_workbook(file_contents=data)
5942
else:
60-
raise ValueError('Must explicitly set engine if not passing in'
61-
' buffer or path for io.')
43+
return open_workbook(filepath_or_buffer)
6244

6345
@property
6446
def sheet_names(self):

0 commit comments

Comments
 (0)