diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f216418c3a8b0..4c8bf62490d82 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -991,6 +991,7 @@ I/O - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- `pandas.io.parquet.PyArrowImpl` now infers `filesystem` using the provided `path` if `filesystem` is not provided via `kwargs`. (:issue:`34841`) Plotting ^^^^^^^^ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index de9a14c82b3cb..530bb6e326e94 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -104,6 +104,9 @@ def write( from_pandas_kwargs["preserve_index"] = index table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + + fs = kwargs.pop("filesystem", get_fs_for_path(path)) + # write_to_dataset does not support a file-like object when # a directory path is used, so just pass the path string. if partition_cols is not None: @@ -111,12 +114,13 @@ def write( table, path, compression=compression, + filesystem=fs, partition_cols=partition_cols, **kwargs, ) else: self.api.parquet.write_table( - table, file_obj_or_path, compression=compression, **kwargs + table, file_obj_or_path, compression=compression, **kwargs, ) if should_close: file_obj_or_path.close() diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index efd34c58d7d19..b6ed8e8b3652a 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -568,6 +568,24 @@ def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col): repeat=1, ) + @td.skip_if_no("s3fs") + @pytest.mark.parametrize("partition_col", [["A"], []]) + def test_s3_roundtrip_for_dir_infer_fs( + self, df_compat, s3_resource, pa, partition_col + ): + expected_df = df_compat.copy() + if partition_col: + expected_df[partition_col] = expected_df[partition_col].astype("category") + check_round_trip( + df_compat, + pa, + expected=expected_df, + path="s3://pandas-test/parquet_dir", + write_kwargs={"partition_cols": partition_col, "compression": None}, + check_like=True, + repeat=1, + ) + @tm.network @td.skip_if_no("pyarrow") def test_parquet_read_from_url(self, df_compat):