forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_xlrd.py
126 lines (102 loc) · 3.86 KB
/
_xlrd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from datetime import time
from typing import List, Optional, Sequence
import numpy as np
from pandas._typing import Scalar, Union
from pandas.compat._optional import import_optional_dependency
from pandas.io.excel._base import _BaseExcelReader
class _XlrdReader(_BaseExcelReader):
def __init__(self, filepath_or_buffer):
"""
Reader using xlrd engine.
Parameters
----------
filepath_or_buffer : string, path object or Workbook
Object to be parsed.
"""
err_msg = "Install xlrd >= 1.0.0 for Excel support"
import_optional_dependency("xlrd", extra=err_msg)
super().__init__(filepath_or_buffer)
@property
def _workbook_class(self):
from xlrd import Book
return Book
def load_workbook(self, filepath_or_buffer):
from xlrd import open_workbook
if hasattr(filepath_or_buffer, "read"):
data = filepath_or_buffer.read()
return open_workbook(file_contents=data)
else:
return open_workbook(filepath_or_buffer)
@property
def sheet_names(self):
return self.book.sheet_names()
def get_sheet_by_name(self, name):
return self.book.sheet_by_name(name)
def get_sheet_by_index(self, index):
return self.book.sheet_by_index(index)
def get_sheet_data(
self,
sheet,
convert_float: bool,
header: Optional[Union[int, Sequence[int]]],
skiprows: Optional[Union[int, Sequence[int]]],
nrows: Optional[int],
) -> List[List[Scalar]]:
from xlrd import (
xldate,
XL_CELL_DATE,
XL_CELL_ERROR,
XL_CELL_BOOLEAN,
XL_CELL_NUMBER,
)
epoch1904 = self.book.datemode
def _parse_cell(cell_contents, cell_typ):
"""
converts the contents of the cell into a pandas appropriate object
"""
if cell_typ == XL_CELL_DATE:
# Use the newer xlrd datetime handling.
try:
cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904)
except OverflowError:
return cell_contents
# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (cell_contents.timetuple())[0:3]
if (not epoch1904 and year == (1899, 12, 31)) or (
epoch1904 and year == (1904, 1, 1)
):
cell_contents = time(
cell_contents.hour,
cell_contents.minute,
cell_contents.second,
cell_contents.microsecond,
)
elif cell_typ == XL_CELL_ERROR:
cell_contents = np.nan
elif cell_typ == XL_CELL_BOOLEAN:
cell_contents = bool(cell_contents)
elif convert_float and cell_typ == XL_CELL_NUMBER:
# GH5394 - Excel 'numbers' are always floats
# it's a minimal perf hit and less surprising
val = int(cell_contents)
if val == cell_contents:
cell_contents = val
return cell_contents
data: List[List[Scalar]] = []
for i in range(sheet.nrows):
should_continue, should_break = self.should_read_row(
i, header, skiprows, nrows
)
if should_continue:
data.append([])
continue
if should_break:
break
row = [
_parse_cell(value, typ)
for value, typ in zip(sheet.row_values(i), sheet.row_types(i))
]
data.append(row)
return data