From 40be9ac0b5b4525b6b244ac494dc56ea50a0bb23 Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Sat, 9 Nov 2019 23:01:56 -0600 Subject: [PATCH 1/7] Fixed docs error --- doc/example.feather | Bin 0 -> 1120 bytes doc/source/conf.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 doc/example.feather diff --git a/doc/example.feather b/doc/example.feather new file mode 100644 index 0000000000000000000000000000000000000000..38dbf087a1590606ae40aaab16fd2d592b771b0e GIT binary patch literal 1120 zcmZuxyGlbr5S^R5Nz{a}NW>HtK_sM7>|!IaNGdxCSXd;{_yCQdR#tw3l~`I>OA5ci zQbZ6$tgQTs#B+9c!YinJCM9>)^273K{XQhooJB2<^&GtHH$(bNO86J_M!J zyGzt-fVTu_y`vB(l0%(5PA9RI8omTJPF`^*F6IUYC9RG~2~6L$#r{j48ZpBzkgR*? zeK=XyxVoIL>-SgCJxtfJc!iX{qHf5i{ID};{XtI7TL*ORLEuwa*BRfN-m;sO{Zgj$ ztzwsCRRGO(Kw^-p-}PKGd}}=UlwnHzzIVB^>*KSHVAFV==PAS|XXz*6tt_YQ$5f~C oX+L~3>;6heD7rV}T*>^s5KCq8YfagILHpBwHUBLPZTxTf15K=C=l}o! literal 0 HcmV?d00001 diff --git a/doc/source/conf.py b/doc/source/conf.py index b4f719b6e64b2..845e4d5f5fde1 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -194,7 +194,7 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = "pandas_sphinx_theme" +html_theme = "default" # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths From 398e75b6f0dbce2bc9bc404708788942ae7bc806 Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Wed, 11 Dec 2019 00:41:08 -0600 Subject: [PATCH 2/7] Added check for string in parquet.py and tests in test_parquet.py --- pandas/io/parquet.py | 4 +++- pandas/tests/io/test_parquet.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index a044cfcdf6a01..51eac8d481231 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -234,7 +234,7 @@ def to_parquet( .. versionadded:: 0.24.0 - partition_cols : list, optional, default None + partition_cols : str or list, optional, default None Column names by which to partition the dataset Columns are partitioned in the order they are given @@ -243,6 +243,8 @@ def to_parquet( kwargs Additional keyword arguments passed to the engine """ + if isinstance(partition_cols, str): + partition_cols = [partition_cols] impl = get_engine(engine) return impl.write( df, diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a98c93c250070..ed26f1f1773ca 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -499,6 +499,18 @@ def test_partition_cols_supported(self, pa, df_full): assert len(dataset.partitions.partition_names) == 2 assert dataset.partitions.partition_names == set(partition_cols) + def test_partition_cols_string(self, pa, df_full): + partition_cols = "bool" + partition_cols_list = [partition_cols] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, compression=None) + import pyarrow.parquet as pq + + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == 1 + assert dataset.partitions.partition_names == set(partition_cols_list) + def test_empty_dataframe(self, pa): # GH #27339 df = pd.DataFrame() @@ -595,6 +607,22 @@ def test_partition_cols_supported(self, fp, df_full): actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 2 + def test_to_parquet_partition_cols_string(self, fp, df_full): + partition_cols = "bool" + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(path) + import fastparquet + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 1 + def test_partition_on_supported(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] From 3ad976c8df4e21e4dcc10ee13d9db301ba64a82b Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Wed, 11 Dec 2019 10:46:10 -0600 Subject: [PATCH 3/7] Added whatsnew entry and fixed conf.py --- doc/source/conf.py | 2 +- doc/source/whatsnew/v1.0.0.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 6d5f4d31d59dd..096f1a63eddf8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -194,7 +194,7 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = "default" +html_theme = "pandas_sphinx_theme" # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3c2e06cb9cf9a..ae194a8f55c51 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -204,6 +204,7 @@ Other enhancements - Roundtripping DataFrames with nullable integer or string data types to parquet (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). +- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) Build Changes ^^^^^^^^^^^^^ From e67fe24e92ecab2035f91193fecac70e887a5bdd Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Wed, 11 Dec 2019 10:48:40 -0600 Subject: [PATCH 4/7] Removed example.feather and applying black formatting --- doc/example.feather | Bin 1120 -> 0 bytes pandas/tests/io/test_parquet.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 doc/example.feather diff --git a/doc/example.feather b/doc/example.feather deleted file mode 100644 index 38dbf087a1590606ae40aaab16fd2d592b771b0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1120 zcmZuxyGlbr5S^R5Nz{a}NW>HtK_sM7>|!IaNGdxCSXd;{_yCQdR#tw3l~`I>OA5ci zQbZ6$tgQTs#B+9c!YinJCM9>)^273K{XQhooJB2<^&GtHH$(bNO86J_M!J zyGzt-fVTu_y`vB(l0%(5PA9RI8omTJPF`^*F6IUYC9RG~2~6L$#r{j48ZpBzkgR*? zeK=XyxVoIL>-SgCJxtfJc!iX{qHf5i{ID};{XtI7TL*ORLEuwa*BRfN-m;sO{Zgj$ ztzwsCRRGO(Kw^-p-}PKGd}}=UlwnHzzIVB^>*KSHVAFV==PAS|XXz*6tt_YQ$5f~C oX+L~3>;6heD7rV}T*>^s5KCq8YfagILHpBwHUBLPZTxTf15K=C=l}o! diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ed26f1f1773ca..212ba00ff5ea2 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -619,7 +619,7 @@ def test_to_parquet_partition_cols_string(self, fp, df_full): ) assert os.path.exists(path) import fastparquet - + actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 1 From a83fd9950c6f6cee6e6fef15828c1078fa1588de Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Wed, 11 Dec 2019 11:04:48 -0600 Subject: [PATCH 5/7] Update test_parquet.py --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 212ba00ff5ea2..919854936c4a0 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -618,7 +618,7 @@ def test_to_parquet_partition_cols_string(self, fp, df_full): compression=None, ) assert os.path.exists(path) - import fastparquet + import fastparquet # noqa: F811 actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 1 From 535bb75b18bc6803a442f601d5a2b10a38b9f68f Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Wed, 11 Dec 2019 11:05:19 -0600 Subject: [PATCH 6/7] Update test_parquet.py --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 919854936c4a0..9ab78b8e000b8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -618,7 +618,7 @@ def test_to_parquet_partition_cols_string(self, fp, df_full): compression=None, ) assert os.path.exists(path) - import fastparquet # noqa: F811 + import fastparquet # noqa: F811 actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 1 From 6f5922f6c226677b24d9978927d090196e3c0bce Mon Sep 17 00:00:00 2001 From: Blake Hawkins Date: Thu, 12 Dec 2019 17:40:56 -0600 Subject: [PATCH 7/7] Added issue reference and updated test name --- pandas/tests/io/test_parquet.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9ab78b8e000b8..251548e7caaab 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -500,6 +500,7 @@ def test_partition_cols_supported(self, pa, df_full): assert dataset.partitions.partition_names == set(partition_cols) def test_partition_cols_string(self, pa, df_full): + # GH #27117 partition_cols = "bool" partition_cols_list = [partition_cols] df = df_full @@ -607,7 +608,8 @@ def test_partition_cols_supported(self, fp, df_full): actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 2 - def test_to_parquet_partition_cols_string(self, fp, df_full): + def test_partition_cols_string(self, fp, df_full): + # GH #27117 partition_cols = "bool" df = df_full with tm.ensure_clean_dir() as path: