@@ -24,25 +24,24 @@ def __init__(self):
24
24
# we need to import on first use
25
25
26
26
try :
27
- import pyarrow # noqa
27
+ import pyarrow
28
+ from pyarrow import parquet
28
29
except ImportError :
29
30
raise ImportError ("pyarrow is required for parquet support\n \n "
30
31
"you can install via conda\n "
31
32
"conda install pyarrow -c conda-forge\n "
32
33
"\n or via pip\n "
33
34
"pip install pyarrow\n " )
35
+ self .api = pyarrow
36
+ self .parquet = parquet
34
37
35
38
def write (self , df , path , compression = None , ** kwargs ):
36
- import pyarrow
37
- from pyarrow import parquet as pq
38
-
39
- table = pyarrow .Table .from_pandas (df )
40
- pq .write_table (table , path ,
41
- compression = compression , ** kwargs )
39
+ table = self .api .Table .from_pandas (df )
40
+ self .parquet .write_table (
41
+ table , path , compression = compression , ** kwargs )
42
42
43
43
def read (self , path ):
44
- import pyarrow
45
- return pyarrow .parquet .read_table (path ).to_pandas ()
44
+ return self .parquet .read_table (path ).to_pandas ()
46
45
47
46
48
47
class FastParquetImpl (object ):
@@ -52,28 +51,25 @@ def __init__(self):
52
51
# we need to import on first use
53
52
54
53
try :
55
- import fastparquet # noqa
54
+ import fastparquet
56
55
except ImportError :
57
56
raise ImportError ("fastparquet is required for parquet support\n \n "
58
57
"you can install via conda\n "
59
58
"conda install fastparquet -c conda-forge\n "
60
59
"\n or via pip\n "
61
60
"pip install fastparquet" )
61
+ self .api = fastparquet
62
62
63
63
def write (self , df , path , compression = None , ** kwargs ):
64
- import fastparquet
65
-
66
64
# thriftpy/protocol/compact.py:339:
67
65
# DeprecationWarning: tostring() is deprecated.
68
66
# Use tobytes() instead.
69
67
with catch_warnings (record = True ):
70
- fastparquet .write (path , df ,
71
- compression = compression , ** kwargs )
68
+ self . api .write (path , df ,
69
+ compression = compression , ** kwargs )
72
70
73
71
def read (self , path ):
74
- import fastparquet
75
- pf = fastparquet .ParquetFile (path )
76
- return pf .to_pandas ()
72
+ return self .api .ParquetFile (path ).to_pandas ()
77
73
78
74
79
75
def to_parquet (df , path , engine , compression = None , ** kwargs ):
0 commit comments