From b83ddc4146548390bd030b155db5e07b587e4139 Mon Sep 17 00:00:00 2001 From: Chris Stadler Date: Thu, 11 Jul 2019 12:24:00 -0400 Subject: [PATCH 1/2] BUG: Accept empty dataframes in DataFrame.to_parquet --- pandas/io/parquet.py | 2 +- pandas/tests/io/test_parquet.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3db05b94e5dce..a2502df45169f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -55,7 +55,7 @@ def validate_dataframe(df): raise ValueError("to_parquet only supports IO with DataFrames") # must have value column names (strings only) - if df.columns.inferred_type not in {"string", "unicode"}: + if df.columns.inferred_type not in {"string", "unicode", "empty"}: raise ValueError("parquet must have string column names") # index level names must be strings diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6ac2e9cd65a27..b2f55a43047cc 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -471,6 +471,10 @@ def test_partition_cols_supported(self, pa, df_full): assert len(dataset.partitions.partition_names) == 2 assert dataset.partitions.partition_names == set(partition_cols) + def test_empty_dataframe(self, pa): + df = pd.DataFrame() + check_round_trip(df, pa) + class TestParquetFastParquet(Base): @td.skip_if_no("fastparquet", min_version="0.2.1") @@ -566,3 +570,9 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): partition_on=partition_cols, partition_cols=partition_cols, ) + + def test_empty_dataframe(self, fp): + df = pd.DataFrame() + expected = df.copy() + expected.index.name = "index" + check_round_trip(df, fp, expected=expected) From ae8e024f5c4470eb2e9524d14bf61a46991f5715 Mon Sep 17 00:00:00 2001 From: Chris Stadler Date: Thu, 11 Jul 2019 14:24:54 -0400 Subject: [PATCH 2/2] Update whatsnew and add issue number to tests --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/tests/io/test_parquet.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ebe8b4770f6aa..f85449bf206e4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1101,6 +1101,7 @@ I/O - Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) - Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) - Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`) +- Fixed bug in :meth:`DataFrame.to_parquet` which would raise a ``ValueError`` when the dataframe had no columns (:issue:`27339`) Plotting ^^^^^^^^ diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b2f55a43047cc..a04fb9fd50257 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -472,6 +472,7 @@ def test_partition_cols_supported(self, pa, df_full): assert dataset.partitions.partition_names == set(partition_cols) def test_empty_dataframe(self, pa): + # GH #27339 df = pd.DataFrame() check_round_trip(df, pa) @@ -572,6 +573,7 @@ def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): ) def test_empty_dataframe(self, fp): + # GH #27339 df = pd.DataFrame() expected = df.copy() expected.index.name = "index"