|
1 | 1 | from io import BytesIO
|
2 | 2 |
|
3 | 3 | import numpy as np
|
| 4 | +from odf.opendocument import OpenDocumentSpreadsheet |
| 5 | +from odf.table import Table, TableCell, TableRow |
| 6 | +from odf.text import P |
4 | 7 |
|
5 | 8 | from pandas import DataFrame, ExcelWriter, date_range, read_excel
|
6 | 9 | import pandas.util.testing as tm
|
7 | 10 |
|
8 | 11 |
|
9 |
| -class Excel: |
| 12 | +def _generate_dataframe(): |
| 13 | + N = 2000 |
| 14 | + C = 5 |
| 15 | + df = DataFrame( |
| 16 | + np.random.randn(N, C), |
| 17 | + columns=["float{}".format(i) for i in range(C)], |
| 18 | + index=date_range("20000101", periods=N, freq="H"), |
| 19 | + ) |
| 20 | + df["object"] = tm.makeStringIndex(N) |
| 21 | + return df |
| 22 | + |
| 23 | + |
| 24 | +class WriteExcel: |
10 | 25 |
|
11 | 26 | params = ["openpyxl", "xlsxwriter", "xlwt"]
|
12 | 27 | param_names = ["engine"]
|
13 | 28 |
|
14 | 29 | def setup(self, engine):
|
15 |
| - N = 2000 |
16 |
| - C = 5 |
17 |
| - self.df = DataFrame( |
18 |
| - np.random.randn(N, C), |
19 |
| - columns=["float{}".format(i) for i in range(C)], |
20 |
| - index=date_range("20000101", periods=N, freq="H"), |
21 |
| - ) |
22 |
| - self.df["object"] = tm.makeStringIndex(N) |
23 |
| - self.bio_read = BytesIO() |
24 |
| - self.writer_read = ExcelWriter(self.bio_read, engine=engine) |
25 |
| - self.df.to_excel(self.writer_read, sheet_name="Sheet1") |
26 |
| - self.writer_read.save() |
27 |
| - self.bio_read.seek(0) |
28 |
| - |
29 |
| - def time_read_excel(self, engine): |
30 |
| - read_excel(self.bio_read) |
| 30 | + self.df = _generate_dataframe() |
31 | 31 |
|
32 | 32 | def time_write_excel(self, engine):
|
33 |
| - bio_write = BytesIO() |
34 |
| - bio_write.seek(0) |
35 |
| - writer_write = ExcelWriter(bio_write, engine=engine) |
36 |
| - self.df.to_excel(writer_write, sheet_name="Sheet1") |
37 |
| - writer_write.save() |
| 33 | + bio = BytesIO() |
| 34 | + bio.seek(0) |
| 35 | + writer = ExcelWriter(bio, engine=engine) |
| 36 | + self.df.to_excel(writer, sheet_name="Sheet1") |
| 37 | + writer.save() |
| 38 | + |
| 39 | + |
| 40 | +class ReadExcel: |
| 41 | + |
| 42 | + params = ["xlrd", "openpyxl", "odf"] |
| 43 | + param_names = ["engine"] |
| 44 | + fname_excel = "spreadsheet.xlsx" |
| 45 | + fname_odf = "spreadsheet.ods" |
| 46 | + |
| 47 | + def _create_odf(self): |
| 48 | + doc = OpenDocumentSpreadsheet() |
| 49 | + table = Table(name="Table1") |
| 50 | + for row in self.df.values: |
| 51 | + tr = TableRow() |
| 52 | + for val in row: |
| 53 | + tc = TableCell(valuetype="string") |
| 54 | + tc.addElement(P(text=val)) |
| 55 | + tr.addElement(tc) |
| 56 | + table.addElement(tr) |
| 57 | + |
| 58 | + doc.spreadsheet.addElement(table) |
| 59 | + doc.save(self.fname_odf) |
| 60 | + |
| 61 | + def setup_cache(self): |
| 62 | + self.df = _generate_dataframe() |
| 63 | + |
| 64 | + self.df.to_excel(self.fname_excel, sheet_name="Sheet1") |
| 65 | + self._create_odf() |
| 66 | + |
| 67 | + def time_read_excel(self, engine): |
| 68 | + fname = self.fname_odf if engine == "odf" else self.fname_excel |
| 69 | + read_excel(fname, engine=engine) |
38 | 70 |
|
39 | 71 |
|
40 | 72 | from ..pandas_vb_common import setup # noqa: F401 isort:skip
|
0 commit comments