@@ -98,9 +98,7 @@ def write(self, df, path, compression='snappy',
98
98
coerce_timestamps = 'ms' , ** kwargs ):
99
99
self .validate_dataframe (df )
100
100
if self ._pyarrow_lt_070 :
101
- self ._validate_write_lt_070 (
102
- df , path , compression , coerce_timestamps , ** kwargs
103
- )
101
+ self ._validate_write_lt_070 (df )
104
102
path , _ , _ = get_filepath_or_buffer (path )
105
103
106
104
if self ._pyarrow_lt_060 :
@@ -116,48 +114,46 @@ def write(self, df, path, compression='snappy',
116
114
117
115
def read (self , path , columns = None , ** kwargs ):
118
116
path , _ , _ = get_filepath_or_buffer (path )
119
- parquet_file = self .api .parquet .ParquetFile (path )
120
117
if self ._pyarrow_lt_070 :
121
- return self ._read_lt_070 (path , parquet_file , columns , ** kwargs )
118
+ return self .api .parquet .read_pandas (path , columns = columns ,
119
+ ** kwargs ).to_pandas ()
122
120
kwargs ['use_pandas_metadata' ] = True
123
- return parquet_file .read (columns = columns , ** kwargs ).to_pandas ()
121
+ return self .api .parquet .read_table (path , columns = columns ,
122
+ ** kwargs ).to_pandas ()
124
123
125
- def _validate_write_lt_070 (self , df , path , compression = 'snappy' ,
126
- coerce_timestamps = 'ms' , ** kwargs ):
124
+ def _validate_write_lt_070 (self , df ):
127
125
# Compatibility shim for pyarrow < 0.7.0
128
126
# TODO: Remove in pandas 0.22.0
129
127
from pandas .core .indexes .multi import MultiIndex
130
128
if isinstance (df .index , MultiIndex ):
131
129
msg = (
132
- "Mulit -index DataFrames are only supported "
130
+ "Multi -index DataFrames are only supported "
133
131
"with pyarrow >= 0.7.0"
134
132
)
135
133
raise ValueError (msg )
136
134
# Validate index
137
135
if not isinstance (df .index , Int64Index ):
138
136
msg = (
139
- "parquet does not support serializing {} for the index;"
140
- "you can .reset_index() to make the index into column(s)"
137
+ "pyarrow < 0.7.0 does not support serializing {} for the "
138
+ "index; you can .reset_index() to make the index into "
139
+ "column(s), or install the latest version of pyarrow or "
140
+ "fastparquet."
141
141
)
142
142
raise ValueError (msg .format (type (df .index )))
143
143
if not df .index .equals (RangeIndex (len (df ))):
144
144
raise ValueError (
145
- "parquet does not support serializing a non-default index "
146
- "for the index; you can .reset_index() to make the index "
147
- "into column(s)"
145
+ "pyarrow < 0.7.0 does not support serializing a non-default "
146
+ "index; you can .reset_index() to make the index into "
147
+ "column(s), or install the latest version of pyarrow or "
148
+ "fastparquet."
148
149
)
149
150
if df .index .name is not None :
150
151
raise ValueError (
151
- "parquet does not serialize index meta-data "
152
- "on a default index"
152
+ "pyarrow < 0.7.0 does not serialize indexes with a name; you "
153
+ "can set the index.name to None or install the latest version "
154
+ "of pyarrow or fastparquet."
153
155
)
154
156
155
- def _read_lt_070 (self , path , parquet_file , columns , ** kwargs ):
156
- # Compatibility shim for pyarrow < 0.7.0
157
- # TODO: Remove in pandas 0.22.0
158
- kwargs ['columns' ] = columns
159
- return self .api .parquet .read_pandas (path , ** kwargs ).to_pandas ()
160
-
161
157
162
158
class FastParquetImpl (BaseImpl ):
163
159
0 commit comments