diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7617ad5b428a2..3fb7b925ceb6b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -25,6 +25,7 @@ New features dataframe's indexes from the resulting Parquet file. (:issue:`20768`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) - :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`) +- :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`) .. _whatsnew_0240.values_api: diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 96ebca16d1892..5c8ab37c7c917 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -84,7 +84,7 @@ def to_feather(df, path): @deprecate_kwarg(old_arg_name='nthreads', new_arg_name='use_threads') -def read_feather(path, use_threads=True): +def read_feather(path, columns=None, use_threads=True): """ Load a feather-format object from the file path @@ -93,6 +93,10 @@ def read_feather(path, use_threads=True): Parameters ---------- path : string file path, or file-like object + columns : sequence, default None + If not provided, all columns are read + + .. versionadded 0.24.0 nthreads : int, default 1 Number of CPU threads to use when reading to pandas.DataFrame @@ -116,6 +120,8 @@ def read_feather(path, use_threads=True): int_use_threads = int(use_threads) if int_use_threads < 1: int_use_threads = 1 - return feather.read_feather(path, nthreads=int_use_threads) + return feather.read_feather(path, columns=columns, + nthreads=int_use_threads) - return feather.read_feather(path, use_threads=bool(use_threads)) + return feather.read_feather(path, columns=columns, + use_threads=bool(use_threads)) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 16b59526c8233..19ecb378b6378 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -26,13 +26,16 @@ def check_error_on_write(self, df, exc): with ensure_clean() as path: to_feather(df, path) - def check_round_trip(self, df, **kwargs): + def check_round_trip(self, df, expected=None, **kwargs): + + if expected is None: + expected = df with ensure_clean() as path: to_feather(df, path) result = read_feather(path, **kwargs) - assert_frame_equal(result, df) + assert_frame_equal(result, expected) def test_error(self): @@ -74,6 +77,16 @@ def test_stringify_columns(self): df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy() self.check_error_on_write(df, ValueError) + def test_read_columns(self): + # GH 24025 + df = pd.DataFrame({'col1': list('abc'), + 'col2': list(range(1, 4)), + 'col3': list('xyz'), + 'col4': list(range(4, 7))}) + columns = ['col1', 'col3'] + self.check_round_trip(df, expected=df[columns], + columns=columns) + def test_unsupported_other(self): # period