-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
restrict columns to read for pandas.read_parquet #18155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c247c2
d00d222
c1449f5
22663e8
f31e6a2
f91f5f8
21c5f5e
ef30f39
54fc1c9
e5336b6
d6baa9d
7f6e7f6
4b22c88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,9 +76,9 @@ def write(self, df, path, compression='snappy', | |
table, path, compression=compression, | ||
coerce_timestamps=coerce_timestamps, **kwargs) | ||
|
||
def read(self, path): | ||
def read(self, path, columns=None): | ||
path, _, _ = get_filepath_or_buffer(path) | ||
return self.api.parquet.read_table(path).to_pandas() | ||
return self.api.parquet.read_table(path, columns=columns).to_pandas() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i’d like to pass thru kwargs as well; these won’t be specific names args just pass thru to the engine to validate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, i think it is good to pass explicit options like columns which are supported by both backends and also pass the kwargs to be able to provide additional engine specific kwargs. Have to look at the test case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok that’s fine |
||
|
||
|
||
class FastParquetImpl(object): | ||
|
@@ -115,9 +115,9 @@ def write(self, df, path, compression='snappy', **kwargs): | |
self.api.write(path, df, | ||
compression=compression, **kwargs) | ||
|
||
def read(self, path): | ||
def read(self, path, columns=None): | ||
path, _, _ = get_filepath_or_buffer(path) | ||
return self.api.ParquetFile(path).to_pandas() | ||
return self.api.ParquetFile(path).to_pandas(columns=columns) | ||
|
||
|
||
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): | ||
|
@@ -178,7 +178,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): | |
return impl.write(df, path, compression=compression) | ||
|
||
|
||
def read_parquet(path, engine='auto', **kwargs): | ||
def read_parquet(path, engine='auto', columns=None, **kwargs): | ||
""" | ||
Load a parquet object from the file path, returning a DataFrame. | ||
|
||
|
@@ -188,6 +188,10 @@ def read_parquet(path, engine='auto', **kwargs): | |
---------- | ||
path : string | ||
File path | ||
columns: list, default=None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a version added tag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
If not None, only these columns will be read from the file. | ||
|
||
.. versionadded 0.21.1 | ||
engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' | ||
Parquet reader library to use. If 'auto', then the option | ||
'io.parquet.engine' is used. If 'auto', then the first | ||
|
@@ -201,4 +205,4 @@ def read_parquet(path, engine='auto', **kwargs): | |
""" | ||
|
||
impl = get_engine(engine) | ||
return impl.read(path) | ||
return impl.read(path, columns=columns) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in next PR, can you add a version added tag here (for 0.21.1)