Skip to content

Commit 1f91242

Browse files
committed
many improvements, refactoring:
* added reading from filelike objects (breaking change!) * supporting pathlike * remove get_sheet_data/get_sheet_names functions (breaking change!) * updated readme/tests * CalamineReader -> CalamineWorkbook (breaking change!)
1 parent 2fe1f26 commit 1f91242

File tree

11 files changed

+201
-189
lines changed

11 files changed

+201
-189
lines changed

Cargo.toml

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ crate-type = ["cdylib"]
1515

1616
[dependencies]
1717
calamine = {version = "0.19.1", features = ["dates", "chrono"]}
18-
pyo3 = {version = "0.18.0", features = ["extension-module", "chrono"]}
18+
pyo3 = {version = "0.18.1", features = ["extension-module", "chrono"]}
1919
chrono = {version = "0.4.23", features = ["serde"]}
20+
pyo3-file = {git = "https://github.com/omerbenamram/pyo3-file.git", rev = "2bc4c1a00551ce1942ebf20cb4ebee4884b3aad1"}
2021

2122
[build-dependencies]
22-
pyo3-build-config = "0.18.0"
23+
pyo3-build-config = "0.18.1"
2324

2425
[package.metadata.maturin]
2526
name = "python_calamine._python_calamine"

README.md

+14-14
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,30 @@ pip install python-calamine
1414

1515
### Example
1616
```python
17-
from python_calamine import get_sheet_data, get_sheet_names
17+
from python_calamine import CalamineWorkbook
1818

19+
workbook = CalamineWorkbook.from_pyobject("file.xlsx")
20+
workbook.sheet_names
21+
# ["Sheet1", "Sheet2"]
1922

20-
get_sheet_names("file.xlsx")
21-
# ['Sheet1', 'Sheet2']
22-
23-
get_sheet_data("file.xlsx", "Sheet1")
23+
workbook.get_sheet_by_name("Sheet1").to_python()
2424
# [
25-
# ['1', '2', '3', '4', '5', '6', '7'],
26-
# ['1', '2', '3', '4', '5', '6', '7'],
27-
# ['1', '2', '3', '4', '5', '6', '7'],
25+
# ["1", "2", "3", "4", "5", "6", "7"],
26+
# ["1", "2", "3", "4", "5", "6", "7"],
27+
# ["1", "2", "3", "4", "5", "6", "7"],
2828
# ]
2929
```
3030

3131
By default, calamine skips empty rows/cols before data. For suppress this behaviour, set `skip_empty_area` to `False`.
3232
```python
33-
from python_calamine import get_sheet_data
33+
from python_calamine import CalamineWorkbook
3434

35-
get_sheet_data("file.xlsx", "Sheet1", skip_empty_area=False)
35+
workbook = CalamineWorkbook.from_pyobject("file.xlsx").get_sheet_by_name("Sheet1").to_python(skip_empty_area=False)
3636
# [
37-
# ['', '', '', '', '', '', ''],
38-
# ['1', '2', '3', '4', '5', '6', '7'],
39-
# ['1', '2', '3', '4', '5', '6', '7'],
40-
# ['1', '2', '3', '4', '5', '6', '7'],
37+
# [", ", ", ", ", ", "],
38+
# ["1", "2", "3", "4", "5", "6", "7"],
39+
# ["1", "2", "3", "4", "5", "6", "7"],
40+
# ["1", "2", "3", "4", "5", "6", "7"],
4141
# ]
4242
```
4343

python/python_calamine/__init__.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from ._python_calamine import ( # noqa: F401
22
CalamineError,
3-
CalamineReader,
43
CalamineSheet,
5-
get_sheet_data,
6-
get_sheet_names,
4+
CalamineWorkbook,
75
)

python/python_calamine/_python_calamine.pyi

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
from __future__ import annotations
22

33
from datetime import date, datetime, time
4+
from os import PathLike
5+
from typing import Protocol
46

57
ValueT = int | float | str | bool | time | date | datetime
68

9+
class ReadBuffer(Protocol):
10+
def seek(self) -> int: ...
11+
def read(self) -> bytes: ...
12+
713
class CalamineSheet:
814
name: str
915
@property
@@ -20,16 +26,13 @@ class CalamineSheet:
2026
def end(self) -> tuple[int, int] | None: ...
2127
def to_python(self, skip_empty_area: bool = True) -> list[list[ValueT]]: ...
2228

23-
class CalamineReader:
29+
class CalamineWorkbook:
2430
sheet_names: list[str]
2531
@classmethod
26-
def from_path(cls, path: str) -> "CalamineReader": ...
32+
def from_pyobject(
33+
cls, path_or_file_like: str | PathLike | ReadBuffer
34+
) -> "CalamineWorkbook": ...
2735
def get_sheet_by_name(self, name: str) -> CalamineSheet: ...
2836
def get_sheet_by_index(self, index: int) -> CalamineSheet: ...
2937

30-
def get_sheet_data(
31-
path: str, sheet: int, skip_empty_area: bool = True
32-
) -> list[list[ValueT]]: ...
33-
def get_sheet_names(path: str) -> list[str]: ...
34-
3538
class CalamineError(Exception): ...

src/lib.rs

+2-36
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,12 @@
1-
use calamine::{open_workbook_auto, Error, Reader, Sheets};
21
use pyo3::prelude::*;
3-
use pyo3::wrap_pyfunction;
42

53
mod types;
64
mod utils;
7-
use crate::types::{CalamineError, CalamineReader, CalamineSheet, CellValue};
8-
use crate::utils::convert_err_to_py;
9-
10-
#[pyfunction]
11-
#[pyo3(signature = (path, sheet, skip_empty_area=true))]
12-
fn get_sheet_data(
13-
path: &str,
14-
sheet: usize,
15-
skip_empty_area: bool,
16-
) -> PyResult<Vec<Vec<CellValue>>> {
17-
let mut excel: Sheets<_> = open_workbook_auto(path).map_err(convert_err_to_py)?;
18-
let readed_range = excel.worksheet_range_at(sheet);
19-
let mut range = readed_range
20-
.unwrap_or_else(|| Err(Error::Msg("Workbook is empty")))
21-
.map_err(convert_err_to_py)?;
22-
if !skip_empty_area {
23-
if let Some(end) = range.end() {
24-
range = range.range((0, 0), end)
25-
}
26-
}
27-
Ok(range
28-
.rows()
29-
.map(|row| row.iter().map(|x| x.into()).collect())
30-
.collect())
31-
}
32-
33-
#[pyfunction]
34-
fn get_sheet_names(path: &str) -> PyResult<Vec<String>> {
35-
let excel: Sheets<_> = open_workbook_auto(path).map_err(convert_err_to_py)?;
36-
Ok(excel.sheet_names().to_vec())
37-
}
5+
use crate::types::{CalamineError, CalamineSheet, CalamineWorkbook, CellValue};
386

397
#[pymodule]
408
fn _python_calamine(py: Python, m: &PyModule) -> PyResult<()> {
41-
m.add_function(wrap_pyfunction!(get_sheet_data, m)?)?;
42-
m.add_function(wrap_pyfunction!(get_sheet_names, m)?)?;
43-
m.add_class::<CalamineReader>()?;
9+
m.add_class::<CalamineWorkbook>()?;
4410
m.add_class::<CalamineSheet>()?;
4511
m.add("CalamineError", py.get_type::<CalamineError>())?;
4612
Ok(())

src/types/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use pyo3::create_exception;
22
use pyo3::exceptions::PyException;
33

44
mod cell;
5-
mod reader;
65
mod sheet;
6+
mod workbook;
77
pub use cell::CellValue;
8-
pub use reader::CalamineReader;
98
pub use sheet::CalamineSheet;
9+
pub use workbook::CalamineWorkbook;
1010

1111
create_exception!(python_calamine, CalamineError, PyException);

src/types/reader.rs

-50
This file was deleted.

src/types/sheet.rs

+5
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,27 @@ impl CalamineSheet {
2222
fn height(&self) -> usize {
2323
self.range.height()
2424
}
25+
2526
#[getter]
2627
fn width(&self) -> usize {
2728
self.range.height()
2829
}
30+
2931
#[getter]
3032
fn total_height(&self) -> u32 {
3133
self.range.end().unwrap_or_default().0
3234
}
35+
3336
#[getter]
3437
fn total_width(&self) -> u32 {
3538
self.range.end().unwrap_or_default().1
3639
}
40+
3741
#[getter]
3842
fn start(&self) -> Option<(u32, u32)> {
3943
self.range.start()
4044
}
45+
4146
#[getter]
4247
fn end(&self) -> Option<(u32, u32)> {
4348
self.range.end()

src/types/workbook.rs

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
use std::fs::File;
2+
use std::io::{BufReader, Cursor, Read};
3+
4+
use calamine::{open_workbook_auto, open_workbook_auto_from_rs, Error, Reader, Sheets};
5+
use pyo3::prelude::*;
6+
use pyo3::types::PyType;
7+
8+
use crate::utils::{err_to_py, FileOrFileLike};
9+
use crate::{CalamineError, CalamineSheet};
10+
11+
enum SheetsEnum {
12+
File(Sheets<BufReader<File>>),
13+
FileLike(Sheets<Cursor<Vec<u8>>>),
14+
}
15+
16+
impl SheetsEnum {
17+
fn sheet_names(&self) -> &[String] {
18+
match self {
19+
SheetsEnum::File(f) => f.sheet_names(),
20+
SheetsEnum::FileLike(f) => f.sheet_names(),
21+
}
22+
}
23+
24+
fn worksheet_range(
25+
&mut self,
26+
name: &str,
27+
) -> Option<Result<calamine::Range<calamine::DataType>, Error>> {
28+
match self {
29+
SheetsEnum::File(f) => f.worksheet_range(name),
30+
SheetsEnum::FileLike(f) => f.worksheet_range(name),
31+
}
32+
}
33+
34+
fn worksheet_range_at(
35+
&mut self,
36+
index: usize,
37+
) -> Option<Result<calamine::Range<calamine::DataType>, Error>> {
38+
match self {
39+
SheetsEnum::File(f) => f.worksheet_range_at(index),
40+
SheetsEnum::FileLike(f) => f.worksheet_range_at(index),
41+
}
42+
}
43+
}
44+
45+
#[pyclass]
46+
pub struct CalamineWorkbook {
47+
sheets: SheetsEnum,
48+
#[pyo3(get)]
49+
sheet_names: Vec<String>,
50+
}
51+
52+
#[pymethods]
53+
impl CalamineWorkbook {
54+
#[classmethod]
55+
fn from_pyobject(_cls: &PyType, path_or_file_like: PyObject) -> PyResult<Self> {
56+
Python::with_gil(|_py| {
57+
let sheets: SheetsEnum = match FileOrFileLike::from_pyobject(path_or_file_like)? {
58+
FileOrFileLike::FileLike(mut f) => {
59+
let mut buf = vec![];
60+
f.read_to_end(&mut buf)?;
61+
let reader = Cursor::new(buf);
62+
SheetsEnum::FileLike(open_workbook_auto_from_rs(reader).map_err(err_to_py)?)
63+
}
64+
FileOrFileLike::Path(s) => {
65+
SheetsEnum::File(open_workbook_auto(s).map_err(err_to_py)?)
66+
}
67+
};
68+
69+
let sheet_names = sheets.sheet_names().to_owned();
70+
71+
Ok(Self {
72+
sheets,
73+
sheet_names,
74+
})
75+
})
76+
}
77+
78+
fn get_sheet_by_name(&mut self, name: &str) -> PyResult<CalamineSheet> {
79+
let range = self
80+
.sheets
81+
.worksheet_range(name)
82+
.unwrap_or_else(|| Err(Error::Msg("Workbook is empty")))
83+
.map_err(err_to_py)?;
84+
Ok(CalamineSheet::new(name.to_owned(), range))
85+
}
86+
87+
fn get_sheet_by_index(&mut self, index: usize) -> PyResult<CalamineSheet> {
88+
let name = self
89+
.sheet_names
90+
.get(index)
91+
.ok_or_else(|| CalamineError::new_err("Workbook is empty"))?
92+
.to_string();
93+
let range = self
94+
.sheets
95+
.worksheet_range_at(index)
96+
.unwrap_or_else(|| Err(Error::Msg("Workbook is empty")))
97+
.map_err(err_to_py)?;
98+
Ok(CalamineSheet::new(name, range))
99+
}
100+
}

src/utils.rs

+39-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,50 @@
1+
use std::path::PathBuf;
2+
13
use calamine::Error;
24
use pyo3::exceptions::PyIOError;
5+
use pyo3::prelude::*;
6+
use pyo3::types::PyString;
37
use pyo3::PyErr;
8+
use pyo3_file::PyFileLikeObject;
49

510
use crate::types::CalamineError;
611

7-
pub fn convert_err_to_py(e: Error) -> PyErr {
12+
pub fn err_to_py(e: Error) -> PyErr {
813
match e {
914
Error::Io(err) => PyIOError::new_err(err.to_string()),
1015
_ => CalamineError::new_err(e.to_string()),
1116
}
1217
}
18+
19+
// This enum uses code from example pyo3-file
20+
// MIT License, see authors under link
21+
// https://github.com/omerbenamram/pyo3-file/tree/2bc4c1a00551ce1942ebf20cb4ebee4884b3aad1#example
22+
#[derive(Debug)]
23+
pub enum FileOrFileLike {
24+
Path(String),
25+
FileLike(PyFileLikeObject),
26+
}
27+
28+
impl FileOrFileLike {
29+
pub fn from_pyobject(path_or_file_like: PyObject) -> PyResult<FileOrFileLike> {
30+
Python::with_gil(|py| {
31+
// is a string path
32+
if let Ok(string_ref) = path_or_file_like.downcast::<PyString>(py) {
33+
return Ok(FileOrFileLike::Path(
34+
string_ref.to_string_lossy().to_string(),
35+
));
36+
}
37+
38+
// is a pathlike
39+
if let Ok(string_ref) = path_or_file_like.extract::<PathBuf>(py) {
40+
return Ok(FileOrFileLike::Path(
41+
string_ref.to_string_lossy().to_string(),
42+
));
43+
}
44+
45+
// is a file-like
46+
PyFileLikeObject::with_requirements(path_or_file_like, true, false, true)
47+
.map(FileOrFileLike::FileLike)
48+
})
49+
}
50+
}

0 commit comments

Comments
 (0)