pandas-dev · jreback · Nov 8, 2017 · Nov 7, 2017 · Nov 7, 2017 · Nov 7, 2017
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -109,7 +109,7 @@ I/O
 ^^^
 
 - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`)
--
+- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
 -
 
 Plotting

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -76,9 +76,9 @@ def write(self, df, path, compression='snappy',
                 table, path, compression=compression,
                 coerce_timestamps=coerce_timestamps, **kwargs)
 
-    def read(self, path):
+    def read(self, path, columns=None):
         path, _, _ = get_filepath_or_buffer(path)
-        return self.api.parquet.read_table(path).to_pandas()
+        return self.api.parquet.read_table(path, columns).to_pandas()
 
 
 class FastParquetImpl(object):
@@ -115,9 +115,9 @@ def write(self, df, path, compression='snappy', **kwargs):
             self.api.write(path, df,
                            compression=compression, **kwargs)
 
-    def read(self, path):
+    def read(self, path, columns=None):
         path, _, _ = get_filepath_or_buffer(path)
-        return self.api.ParquetFile(path).to_pandas()
+        return self.api.ParquetFile(path).to_pandas(columns)
 
 
 def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
@@ -178,7 +178,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
     return impl.write(df, path, compression=compression)
 
 
-def read_parquet(path, engine='auto', **kwargs):
+def read_parquet(path, engine='auto', columns=None, **kwargs):
     """
     Load a parquet object from the file path, returning a DataFrame.
 
@@ -188,6 +188,8 @@ def read_parquet(path, engine='auto', **kwargs):
     ----------
     path : string
         File path
+    columns: list, default=None
+        If not None, only these columns will be read from the file.
     engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
         Parquet reader library to use. If 'auto', then the option
         'io.parquet.engine' is used. If 'auto', then the first
@@ -201,4 +203,4 @@ def read_parquet(path, engine='auto', **kwargs):
     """
 
     impl = get_engine(engine)
-    return impl.read(path)
+    return impl.read(path, columns)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -282,6 +282,18 @@ def test_compression(self, engine, compression):
         df = pd.DataFrame({'A': [1, 2, 3]})
         self.check_round_trip(df, engine, compression=compression)
 
+    def test_read_columns(self, engine, fp):
+        # GH18154
+        df = pd.DataFrame({'string': list('abc'),
+                           'int': list(range(1, 4))})
+
+        with tm.ensure_clean() as path:
+            df.to_parquet(path, engine, compression=None)
+            result = read_parquet(path, engine, columns=["string"])
+
+            expected = pd.DataFrame({'string': list('abc')})
+            tm.assert_frame_equal(result, expected)
+
 
 class TestParquetPyArrow(Base):
-Original file line number
+Diff line change
@@ Expand Up / @@ -109,7 +109,7 @@ I/O @@
     ^^^
     - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`)
-    -
+    - :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
     -
     Plotting
@@ Expand Down @@