Skip to content

Commit 89ae49e

Browse files
authored
Merge pull request pandas-dev#5 from dimastbk/issue-50395
fix review: use CalamineReader/CalamineSheet
2 parents 500fa9f + 15874c3 commit 89ae49e

File tree

4 files changed

+34
-28
lines changed

4 files changed

+34
-28
lines changed

ci/deps/actions-38-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,4 @@ dependencies:
6161

6262
- pip:
6363
- pyqt5==5.15.1
64-
- python-calamine==0.0.7
64+
- python-calamine==0.0.8

pandas/compat/_optional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"pyarrow": "6.0.0",
3535
"pyreadstat": "1.1.2",
3636
"pytest": "7.0.0",
37-
"python-calamine": "0.0.7",
37+
"python-calamine": "0.0.8",
3838
"pyxlsb": "1.0.8",
3939
"s3fs": "2021.08.0",
4040
"scipy": "1.7.1",

pandas/io/excel/_calaminereader.py

+31-25
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
time,
77
)
88
from tempfile import NamedTemporaryFile
9-
from typing import Union
9+
from typing import (
10+
Union,
11+
cast,
12+
)
1013

1114
from pandas._typing import (
1215
FilePath,
@@ -15,8 +18,10 @@
1518
StorageOptions,
1619
)
1720
from pandas.compat._optional import import_optional_dependency
21+
from pandas.util._decorators import doc
1822

1923
import pandas as pd
24+
from pandas.core.shared_docs import _shared_docs
2025

2126
from pandas.io.common import stringify_path
2227
from pandas.io.excel._base import (
@@ -27,28 +32,36 @@
2732
ValueT = Union[int, float, str, bool, time, date, datetime]
2833

2934

30-
class __calamine__:
31-
pass
32-
33-
3435
class CalamineExcelReader(BaseExcelReader):
35-
book: str
3636
_sheet_names: list[str] | None = None
3737

38+
@doc(storage_options=_shared_docs["storage_options"])
3839
def __init__(
3940
self,
4041
filepath_or_buffer: FilePath | ReadBuffer[bytes],
4142
storage_options: StorageOptions = None,
4243
) -> None:
44+
"""
45+
Reader using calamine engine (xlsx/xls/xlsb/ods).
46+
47+
Parameters
48+
----------
49+
filepath_or_buffer : str, path to be parsed or
50+
an open readable stream.
51+
{storage_options}
52+
"""
4353
import_optional_dependency("python_calamine")
4454
super().__init__(filepath_or_buffer, storage_options=storage_options)
4555

4656
@property
47-
def _workbook_class(self) -> type[__calamine__]:
48-
return __calamine__
57+
def _workbook_class(self):
58+
from python_calamine import CalamineReader
4959

50-
def load_workbook(self, filepath_or_buffer) -> str:
60+
return CalamineReader
61+
62+
def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
5163
if hasattr(filepath_or_buffer, "read") and hasattr(filepath_or_buffer, "seek"):
64+
filepath_or_buffer = cast(ReadBuffer, filepath_or_buffer)
5265
ext = inspect_excel_format(filepath_or_buffer)
5366
with NamedTemporaryFile(suffix=f".{ext}", delete=False) as tmp_file:
5467
filepath_or_buffer.seek(0)
@@ -59,29 +72,24 @@ def load_workbook(self, filepath_or_buffer) -> str:
5972

6073
assert isinstance(filepath_or_buffer, str)
6174

62-
from python_calamine import get_sheet_names
75+
from python_calamine import CalamineReader
6376

64-
self._sheet_names = get_sheet_names(filepath_or_buffer)
65-
return filepath_or_buffer
77+
return CalamineReader.from_path(filepath_or_buffer)
6678

6779
@property
6880
def sheet_names(self) -> list[str]:
69-
from python_calamine import get_sheet_names
70-
71-
if self._sheet_names is None:
72-
self._sheet_names = get_sheet_names(self.book)
73-
return self._sheet_names
81+
return self.book.sheet_names # pyright: ignore
7482

75-
def get_sheet_by_name(self, name: str) -> int:
83+
def get_sheet_by_name(self, name: str):
7684
self.raise_if_bad_sheet_by_name(name)
77-
return self.sheet_names.index(name)
85+
return self.book.get_sheet_by_name(name) # pyright: ignore
7886

79-
def get_sheet_by_index(self, index: int) -> int:
87+
def get_sheet_by_index(self, index: int):
8088
self.raise_if_bad_sheet_by_index(index)
81-
return index
89+
return self.book.get_sheet_by_index(index) # pyright: ignore
8290

8391
def get_sheet_data(
84-
self, sheet: int, file_rows_needed: int | None = None
92+
self, sheet, file_rows_needed: int | None = None
8593
) -> list[list[Scalar]]:
8694
def _convert_cell(value: ValueT) -> Scalar:
8795
if isinstance(value, float):
@@ -97,9 +105,7 @@ def _convert_cell(value: ValueT) -> Scalar:
97105

98106
return value
99107

100-
from python_calamine import get_sheet_data
101-
102-
rows = get_sheet_data(self.book, sheet, skip_empty_area=False)
108+
rows: list[list[ValueT]] = sheet.to_python(skip_empty_area=False)
103109
data: list[list[Scalar]] = []
104110

105111
for row in rows:

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ all = ['beautifulsoup4>=4.9.3',
104104
'pytest>=7.0.0',
105105
'pytest-xdist>=2.2.0',
106106
'pytest-asyncio>=0.17.0',
107-
'python-calamine>=0.0.7',
107+
'python-calamine>=0.0.8',
108108
'python-snappy>=0.6.0',
109109
'pyxlsb>=1.0.8',
110110
'qtpy>=2.2.0',

0 commit comments

Comments
 (0)