Skip to content

Commit 792b05d

Browse files
jonashaagnoatamir
authored andcommitted
SAS7BDAT parser: Speed up RLE/RDC decompression (pandas-dev#47405)
* Speed up RLE/RDC decompression * Update tests * ssize_t -> size_t * Update sas.pyx * Don't use null byte as except value * Nit * Simplify condition * Review feedback * Docstring -> comment * Revert "Simplify condition" This reverts commit 263aea6. * Lint * Speed up some Cython `except` * Typo
1 parent c769b99 commit 792b05d

File tree

3 files changed

+150
-123
lines changed

3 files changed

+150
-123
lines changed

asv_bench/benchmarks/io/sas.py

+15-22
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,23 @@
1-
import os
1+
from pathlib import Path
22

33
from pandas import read_sas
44

5+
ROOT = Path(__file__).parents[3] / "pandas" / "tests" / "io" / "sas" / "data"
6+
57

68
class SAS:
9+
def time_read_sas7bdat(self):
10+
read_sas(ROOT / "test1.sas7bdat")
711

8-
params = ["sas7bdat", "xport"]
9-
param_names = ["format"]
12+
def time_read_xpt(self):
13+
read_sas(ROOT / "paxraw_d_short.xpt")
1014

11-
def setup(self, format):
12-
# Read files that are located in 'pandas/tests/io/sas/data'
13-
files = {"sas7bdat": "test1.sas7bdat", "xport": "paxraw_d_short.xpt"}
14-
file = files[format]
15-
paths = [
16-
os.path.dirname(__file__),
17-
"..",
18-
"..",
19-
"..",
20-
"pandas",
21-
"tests",
22-
"io",
23-
"sas",
24-
"data",
25-
file,
26-
]
27-
self.f = os.path.join(*paths)
15+
def time_read_sas7bdat_2(self):
16+
next(read_sas(ROOT / "0x00controlbyte.sas7bdat.bz2", chunksize=11000))
2817

29-
def time_read_sas(self, format):
30-
read_sas(self.f, format=format)
18+
def time_read_sas7bdat_2_chunked(self):
19+
for i, _ in enumerate(
20+
read_sas(ROOT / "0x00controlbyte.sas7bdat.bz2", chunksize=1000)
21+
):
22+
if i == 10:
23+
break

0 commit comments

Comments
 (0)