From 65ecd54283eb797b949526f1facdc2d2f46373d1 Mon Sep 17 00:00:00 2001 From: prajval Date: Fri, 16 Aug 2019 20:35:22 -0500 Subject: [PATCH 1/5] read_parquet now has path_to_directory accept info --- .clang-format | 0 pandas/io/parquet.py | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6fc70e9f4a737..86196b50c4726 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -269,6 +269,10 @@ def read_parquet(path, engine="auto", columns=None, **kwargs): expected. A local file could be: ``file://localhost/path/to/table.parquet``. + A file path can also be a directory name that contains multiple(partitioned) + parquet files (in addition to single file path). A directory path could be: + ``directory://usr/path/to/folder``. + If you want to pass in a path object, pandas accepts any ``os.PathLike``. From f8550d1d592ad1c30f125cd76bff73cbfc7066a5 Mon Sep 17 00:00:00 2001 From: prajval Date: Sat, 17 Aug 2019 11:59:49 -0500 Subject: [PATCH 2/5] added example for scatter plot with best line fit --- doc/source/user_guide/visualization.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index fdceaa5868cec..ba0053d0fd508 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -1628,3 +1628,19 @@ when plotting a large number of points. :suppress: plt.close('all') + +Examples +~~~~~~~~ + +In order to understand how two variables are correlated, the best fit line +is a good way. You can use ``seaborn.lmplot()`` method that combines ``regplot()`` +and ``FacetGrid`` to plot data and regression model fits across a FacetGrid. + +.. ipython:: python + :suppress: + + import seaborn as sns + df4 = pd.DataFrame({'a': np.random.randn(100) + 1, 'b': np.random.randn(100) , + 'c': np.random.randn(100) - 1 }, columns=['a', 'b', 'c']) + + sns.lmplot(x="a", y="b", data = df4) \ No newline at end of file From 037bc1d4d8e8fcd4061fbc5e630f4bedcfa3cb36 Mon Sep 17 00:00:00 2001 From: prajval Date: Sat, 17 Aug 2019 12:51:38 -0500 Subject: [PATCH 3/5] added example for scatter plot with best line fit --- doc/source/user_guide/visualization.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index ba0053d0fd508..d313e82fefce6 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -1640,7 +1640,8 @@ and ``FacetGrid`` to plot data and regression model fits across a FacetGrid. :suppress: import seaborn as sns - df4 = pd.DataFrame({'a': np.random.randn(100) + 1, 'b': np.random.randn(100) , - 'c': np.random.randn(100) - 1 }, columns=['a', 'b', 'c']) + df4 = pd.DataFrame({'a': np.random.randn(100) + 1, 'b': np.random.randn(100), + 'c': np.random.randn(100) - 1}, columns=['a', 'b', 'c']) - sns.lmplot(x="a", y="b", data = df4) \ No newline at end of file + sns.lmplot(x="a", y="b", data=df4) + \ No newline at end of file From 71bfe1551baa38787c8049d6cd76aabf2748a3e6 Mon Sep 17 00:00:00 2001 From: prajval Date: Sat, 17 Aug 2019 13:33:42 -0500 Subject: [PATCH 4/5] added example for scatter plot with best line fit --- doc/source/user_guide/visualization.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index d313e82fefce6..9e1b08879b098 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -1637,11 +1637,10 @@ is a good way. You can use ``seaborn.lmplot()`` method that combines ``regplot() and ``FacetGrid`` to plot data and regression model fits across a FacetGrid. .. ipython:: python - :suppress: + :suppress: - import seaborn as sns - df4 = pd.DataFrame({'a': np.random.randn(100) + 1, 'b': np.random.randn(100), + import seaborn as sns + df4 = pd.DataFrame({'a': np.random.randn(100) + 1, 'b': np.random.randn(100), 'c': np.random.randn(100) - 1}, columns=['a', 'b', 'c']) - sns.lmplot(x="a", y="b", data=df4) - \ No newline at end of file + sns.lmplot(x="a", y="b", data=df4) From b929efd65964c2bf51b6132af6aac07c8833daa0 Mon Sep 17 00:00:00 2001 From: prajval Date: Sat, 17 Aug 2019 15:44:23 -0500 Subject: [PATCH 5/5] Series.rename now handles values by Series constructor --- pandas/core/series.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c891298d6e499..6ea8f6db60e92 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4165,12 +4165,10 @@ def rename(self, index=None, **kwargs): """ kwargs["inplace"] = validate_bool_kwarg(kwargs.get("inplace", False), "inplace") - non_mapping = is_scalar(index) or ( - is_list_like(index) and not is_dict_like(index) - ) - if non_mapping: + if callable(index) or is_dict_like(index): + return super().rename(index=index, **kwargs) + else: return self._set_name(index, inplace=kwargs.get("inplace")) - return super().rename(index=index, **kwargs) @Substitution(**_shared_doc_kwargs) @Appender(generic.NDFrame.reindex.__doc__)