@@ -86,9 +86,12 @@ def __init__(self):
86
86
"\n or via pip\n "
87
87
"pip install -U pyarrow\n "
88
88
)
89
+
90
+ self ._pyarrow_lt_060 = (
91
+ LooseVersion (pyarrow .__version__ ) < LooseVersion ('0.6.0' ))
89
92
self ._pyarrow_lt_070 = (
90
- LooseVersion (pyarrow .__version__ ) < LooseVersion ('0.7.0' )
91
- )
93
+ LooseVersion (pyarrow .__version__ ) < LooseVersion ('0.7.0' ))
94
+
92
95
self .api = pyarrow
93
96
94
97
def write (self , df , path , compression = 'snappy' ,
@@ -99,17 +102,23 @@ def write(self, df, path, compression='snappy',
99
102
df , path , compression , coerce_timestamps , ** kwargs
100
103
)
101
104
path , _ , _ = get_filepath_or_buffer (path )
102
- table = self .api .Table .from_pandas (df )
103
- self .api .parquet .write_table (
104
- table , path , compression = compression ,
105
- coerce_timestamps = coerce_timestamps , ** kwargs )
105
+
106
+ if self ._pyarrow_lt_060 :
107
+ table = self .api .Table .from_pandas (df , timestamps_to_ms = True )
108
+ self .api .parquet .write_table (
109
+ table , path , compression = compression , ** kwargs )
110
+
111
+ else :
112
+ table = self .api .Table .from_pandas (df )
113
+ self .api .parquet .write_table (
114
+ table , path , compression = compression ,
115
+ coerce_timestamps = coerce_timestamps , ** kwargs )
106
116
107
117
def read (self , path , columns = None , ** kwargs ):
108
118
path , _ , _ = get_filepath_or_buffer (path )
109
119
parquet_file = self .api .parquet .ParquetFile (path )
110
120
if self ._pyarrow_lt_070 :
111
- parquet_file .path = path
112
- return self ._read_lt_070 (parquet_file , columns , ** kwargs )
121
+ return self ._read_lt_070 (path , parquet_file , columns , ** kwargs )
113
122
kwargs ['use_pandas_metadata' ] = True
114
123
return parquet_file .read (columns = columns , ** kwargs ).to_pandas ()
115
124
@@ -143,17 +152,17 @@ def _validate_write_lt_070(self, df, path, compression='snappy',
143
152
"on a default index"
144
153
)
145
154
146
- def _read_lt_070 (self , parquet_file , columns , ** kwargs ):
155
+ def _read_lt_070 (self , path , parquet_file , columns , ** kwargs ):
147
156
# Compatibility shim for pyarrow < 0.7.0
148
157
# TODO: Remove in pandas 0.22.0
149
158
from itertools import chain
150
159
import json
151
160
if columns is not None :
152
- metadata = json .loads (parquet_file .metadata .metadata [b'pandas' ])
161
+ metadata = json .loads (
162
+ parquet_file .metadata .metadata [b'pandas' ].decode ('utf-8' ))
153
163
columns = set (chain (columns , metadata ['index_columns' ]))
154
164
kwargs ['columns' ] = columns
155
- kwargs ['path' ] = parquet_file .path
156
- return self .api .parquet .read_table (** kwargs ).to_pandas ()
165
+ return self .api .parquet .read_table (path , ** kwargs ).to_pandas ()
157
166
158
167
159
168
class FastParquetImpl (BaseImpl ):
0 commit comments