Skip to content

Commit 79de86b

Browse files
further clean-up
1 parent 67dffde commit 79de86b

File tree

2 files changed

+22
-27
lines changed

2 files changed

+22
-27
lines changed

pandas/io/parquet.py

+18-22
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,7 @@ def write(self, df, path, compression='snappy',
9898
coerce_timestamps='ms', **kwargs):
9999
self.validate_dataframe(df)
100100
if self._pyarrow_lt_070:
101-
self._validate_write_lt_070(
102-
df, path, compression, coerce_timestamps, **kwargs
103-
)
101+
self._validate_write_lt_070(df)
104102
path, _, _ = get_filepath_or_buffer(path)
105103

106104
if self._pyarrow_lt_060:
@@ -116,48 +114,46 @@ def write(self, df, path, compression='snappy',
116114

117115
def read(self, path, columns=None, **kwargs):
118116
path, _, _ = get_filepath_or_buffer(path)
119-
parquet_file = self.api.parquet.ParquetFile(path)
120117
if self._pyarrow_lt_070:
121-
return self._read_lt_070(path, parquet_file, columns, **kwargs)
118+
return self.api.parquet.read_pandas(path, columns=columns,
119+
**kwargs).to_pandas()
122120
kwargs['use_pandas_metadata'] = True
123-
return parquet_file.read(columns=columns, **kwargs).to_pandas()
121+
return self.api.parquet.read_table(path, columns=columns,
122+
**kwargs).to_pandas()
124123

125-
def _validate_write_lt_070(self, df, path, compression='snappy',
126-
coerce_timestamps='ms', **kwargs):
124+
def _validate_write_lt_070(self, df):
127125
# Compatibility shim for pyarrow < 0.7.0
128126
# TODO: Remove in pandas 0.22.0
129127
from pandas.core.indexes.multi import MultiIndex
130128
if isinstance(df.index, MultiIndex):
131129
msg = (
132-
"Mulit-index DataFrames are only supported "
130+
"Multi-index DataFrames are only supported "
133131
"with pyarrow >= 0.7.0"
134132
)
135133
raise ValueError(msg)
136134
# Validate index
137135
if not isinstance(df.index, Int64Index):
138136
msg = (
139-
"parquet does not support serializing {} for the index;"
140-
"you can .reset_index() to make the index into column(s)"
137+
"pyarrow < 0.7.0 does not support serializing {} for the "
138+
"index; you can .reset_index() to make the index into "
139+
"column(s), or install the latest version of pyarrow or "
140+
"fastparquet."
141141
)
142142
raise ValueError(msg.format(type(df.index)))
143143
if not df.index.equals(RangeIndex(len(df))):
144144
raise ValueError(
145-
"parquet does not support serializing a non-default index "
146-
"for the index; you can .reset_index() to make the index "
147-
"into column(s)"
145+
"pyarrow < 0.7.0 does not support serializing a non-default "
146+
"index; you can .reset_index() to make the index into "
147+
"column(s), or install the latest version of pyarrow or "
148+
"fastparquet."
148149
)
149150
if df.index.name is not None:
150151
raise ValueError(
151-
"parquet does not serialize index meta-data "
152-
"on a default index"
152+
"pyarrow < 0.7.0 does not serialize indexes with a name; you "
153+
"can set the index.name to None or install the latest version "
154+
"of pyarrow or fastparquet."
153155
)
154156

155-
def _read_lt_070(self, path, parquet_file, columns, **kwargs):
156-
# Compatibility shim for pyarrow < 0.7.0
157-
# TODO: Remove in pandas 0.22.0
158-
kwargs['columns'] = columns
159-
return self.api.parquet.read_pandas(path, **kwargs).to_pandas()
160-
161157

162158
class FastParquetImpl(BaseImpl):
163159

pandas/tests/io/test_parquet.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from warnings import catch_warnings
77

88
import numpy as np
9-
from numpy.random import randn
109
import pandas as pd
1110
from pandas.compat import PY3, is_platform_windows
1211
from pandas.io.parquet import (to_parquet, read_parquet, get_engine,
@@ -324,7 +323,7 @@ def test_write_index(self, engine):
324323
# index with meta-data
325324
df.index = [0, 1, 2]
326325
df.index.name = 'foo'
327-
self.check_round_trip( df, engine, write_kwargs={'compression': None})
326+
self.check_round_trip(df, engine, write_kwargs={'compression': None})
328327

329328
def test_write_multiindex(self, pa_ge_070):
330329
# Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version
@@ -345,11 +344,11 @@ def test_multiindex_with_columns(self, pa_ge_070):
345344

346345
engine = pa_ge_070
347346
dates = pd.date_range('01-Jan-2018', '01-Dec-2018', freq='MS')
348-
df = pd.DataFrame(randn(2 * len(dates), 3), columns=list('ABC'))
347+
df = pd.DataFrame(np.random.randn(2 * len(dates), 3),
348+
columns=list('ABC'))
349349
index1 = pd.MultiIndex.from_product(
350350
[['Level1', 'Level2'], dates],
351-
names=['level', 'date']
352-
)
351+
names=['level', 'date'])
353352
index2 = index1.copy(names=None)
354353
for index in [index1, index2]:
355354
df.index = index

0 commit comments

Comments
 (0)