Skip to content

Commit 72980fb

Browse files
nixphixTomAugspurger
authored andcommitted
ENH: Add columns argument to read_feather() (#24025) (#24034)
1 parent 00f5aba commit 72980fb

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ New features
2525
dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
2626
- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
2727
- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
28+
- :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`)
2829

2930
.. _whatsnew_0240.values_api:
3031

pandas/io/feather_format.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def to_feather(df, path):
8484

8585

8686
@deprecate_kwarg(old_arg_name='nthreads', new_arg_name='use_threads')
87-
def read_feather(path, use_threads=True):
87+
def read_feather(path, columns=None, use_threads=True):
8888
"""
8989
Load a feather-format object from the file path
9090
@@ -93,6 +93,10 @@ def read_feather(path, use_threads=True):
9393
Parameters
9494
----------
9595
path : string file path, or file-like object
96+
columns : sequence, default None
97+
If not provided, all columns are read
98+
99+
.. versionadded 0.24.0
96100
nthreads : int, default 1
97101
Number of CPU threads to use when reading to pandas.DataFrame
98102
@@ -116,6 +120,8 @@ def read_feather(path, use_threads=True):
116120
int_use_threads = int(use_threads)
117121
if int_use_threads < 1:
118122
int_use_threads = 1
119-
return feather.read_feather(path, nthreads=int_use_threads)
123+
return feather.read_feather(path, columns=columns,
124+
nthreads=int_use_threads)
120125

121-
return feather.read_feather(path, use_threads=bool(use_threads))
126+
return feather.read_feather(path, columns=columns,
127+
use_threads=bool(use_threads))

pandas/tests/io/test_feather.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ def check_error_on_write(self, df, exc):
2626
with ensure_clean() as path:
2727
to_feather(df, path)
2828

29-
def check_round_trip(self, df, **kwargs):
29+
def check_round_trip(self, df, expected=None, **kwargs):
30+
31+
if expected is None:
32+
expected = df
3033

3134
with ensure_clean() as path:
3235
to_feather(df, path)
3336

3437
result = read_feather(path, **kwargs)
35-
assert_frame_equal(result, df)
38+
assert_frame_equal(result, expected)
3639

3740
def test_error(self):
3841

@@ -74,6 +77,16 @@ def test_stringify_columns(self):
7477
df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
7578
self.check_error_on_write(df, ValueError)
7679

80+
def test_read_columns(self):
81+
# GH 24025
82+
df = pd.DataFrame({'col1': list('abc'),
83+
'col2': list(range(1, 4)),
84+
'col3': list('xyz'),
85+
'col4': list(range(4, 7))})
86+
columns = ['col1', 'col3']
87+
self.check_round_trip(df, expected=df[columns],
88+
columns=columns)
89+
7790
def test_unsupported_other(self):
7891

7992
# period

0 commit comments

Comments
 (0)