|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 |
| -from io import BufferedReader, BytesIO |
4 |
| -from pathlib import PurePath |
5 |
| -from tempfile import NamedTemporaryFile |
6 |
| -from typing import Any, Type |
| 3 | +from datetime import date, datetime, time |
| 4 | +from typing import Union |
7 | 5 |
|
| 6 | +import pandas as pd |
| 7 | +from pandas._typing import FilePath, ReadBuffer, Scalar, StorageOptions |
| 8 | +from pandas.compat._optional import import_optional_dependency |
| 9 | +from pandas.core.shared_docs import _shared_docs |
8 | 10 | from pandas.io.excel import ExcelFile
|
9 |
| -from pandas.io.excel._base import BaseExcelReader, inspect_excel_format |
| 11 | +from pandas.io.excel._base import BaseExcelReader |
| 12 | +from pandas.util._decorators import doc |
10 | 13 |
|
11 |
| -from ._python_calamine import get_sheet_data, get_sheet_names |
12 |
| - |
13 |
| - |
14 |
| -class __calamine__: |
15 |
| - pass |
| 14 | +_ValueT = Union[int, float, str, bool, time, date, datetime] |
16 | 15 |
|
17 | 16 |
|
18 | 17 | class CalamineExcelReader(BaseExcelReader):
|
19 |
| - book: str |
20 | 18 | _sheet_names: list[str] | None = None
|
21 | 19 |
|
22 |
| - @property |
23 |
| - def _workbook_class(self) -> Type[__calamine__]: |
24 |
| - return __calamine__ |
| 20 | + @doc(storage_options=_shared_docs["storage_options"]) |
| 21 | + def __init__( |
| 22 | + self, |
| 23 | + filepath_or_buffer: FilePath | ReadBuffer[bytes], |
| 24 | + storage_options: StorageOptions = None, |
| 25 | + ) -> None: |
| 26 | + """ |
| 27 | + Reader using calamine engine (xlsx/xls/xlsb/ods). |
| 28 | +
|
| 29 | + Parameters |
| 30 | + ---------- |
| 31 | + filepath_or_buffer : str, path to be parsed or |
| 32 | + an open readable stream. |
| 33 | + {storage_options} |
| 34 | + """ |
| 35 | + import_optional_dependency("python_calamine") |
| 36 | + super().__init__(filepath_or_buffer, storage_options=storage_options) |
25 | 37 |
|
26 |
| - def load_workbook( |
27 |
| - self, filepath_or_buffer: str | PurePath | BufferedReader | BytesIO |
28 |
| - ) -> str: |
29 |
| - if isinstance(filepath_or_buffer, BufferedReader): |
30 |
| - filepath_or_buffer = filepath_or_buffer.name |
31 |
| - |
32 |
| - elif isinstance(filepath_or_buffer, BytesIO): |
33 |
| - ext = inspect_excel_format(filepath_or_buffer) |
34 |
| - with NamedTemporaryFile(suffix=f".{ext}", delete=False) as tmp_file: |
35 |
| - tmp_file.write(filepath_or_buffer.getvalue()) |
36 |
| - filepath_or_buffer = tmp_file.name |
| 38 | + @property |
| 39 | + def _workbook_class(self): |
| 40 | + from python_calamine import CalamineWorkbook |
37 | 41 |
|
38 |
| - elif isinstance(filepath_or_buffer, PurePath): |
39 |
| - filepath_or_buffer = filepath_or_buffer.as_posix() |
| 42 | + return CalamineWorkbook |
40 | 43 |
|
41 |
| - assert isinstance(filepath_or_buffer, str) |
| 44 | + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): |
| 45 | + from python_calamine import CalamineWorkbook |
42 | 46 |
|
43 |
| - self._sheet_names = get_sheet_names(filepath_or_buffer) |
44 |
| - return filepath_or_buffer |
| 47 | + return CalamineWorkbook.from_pyobject(filepath_or_buffer) |
45 | 48 |
|
46 | 49 | @property
|
47 | 50 | def sheet_names(self) -> list[str]:
|
48 |
| - if self._sheet_names is None: |
49 |
| - self._sheet_names = get_sheet_names(self.book) |
50 |
| - return self._sheet_names |
| 51 | + return self.book.sheet_names # pyright: ignore |
51 | 52 |
|
52 |
| - def get_sheet_by_name(self, name: str) -> int: |
| 53 | + def get_sheet_by_name(self, name: str): |
53 | 54 | self.raise_if_bad_sheet_by_name(name)
|
54 |
| - return self.sheet_names.index(name) |
| 55 | + return self.book.get_sheet_by_name(name) # pyright: ignore |
55 | 56 |
|
56 |
| - def get_sheet_by_index(self, index: int) -> int: |
| 57 | + def get_sheet_by_index(self, index: int): |
57 | 58 | self.raise_if_bad_sheet_by_index(index)
|
58 |
| - return index |
| 59 | + return self.book.get_sheet_by_index(index) # pyright: ignore |
59 | 60 |
|
60 |
| - def get_sheet_data(self, sheet: int, convert_float: bool) -> list[list[Any]]: |
61 |
| - return get_sheet_data(self.book, sheet) |
| 61 | + def get_sheet_data( |
| 62 | + self, sheet, file_rows_needed: int | None = None |
| 63 | + ) -> list[list[Scalar]]: |
| 64 | + def _convert_cell(value: _ValueT) -> Scalar: |
| 65 | + if isinstance(value, float): |
| 66 | + val = int(value) |
| 67 | + if val == value: |
| 68 | + return val |
| 69 | + else: |
| 70 | + return value |
| 71 | + elif isinstance(value, date): |
| 72 | + return pd.Timestamp(value) |
| 73 | + elif isinstance(value, time): |
| 74 | + return value.isoformat() |
62 | 75 |
|
| 76 | + return value |
63 | 77 |
|
64 |
| -def pandas_monkeypatch(): |
| 78 | + rows: list[list[_ValueT]] = sheet.to_python(skip_empty_area=False) |
| 79 | + data: list[list[Scalar]] = [] |
| 80 | + |
| 81 | + for row in rows: |
| 82 | + data.append([_convert_cell(cell) for cell in row]) |
| 83 | + if file_rows_needed is not None and len(data) >= file_rows_needed: |
| 84 | + break |
| 85 | + |
| 86 | + return data |
65 | 87 |
|
| 88 | + |
| 89 | +def pandas_monkeypatch(): |
66 | 90 | ExcelFile._engines = {
|
67 | 91 | "calamine": CalamineExcelReader,
|
68 | 92 | **ExcelFile._engines,
|
|
0 commit comments