From f9b2c7f42ad5a51267262aaaa456fad717a134a8 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 16:17:39 -0400 Subject: [PATCH 01/14] modified enviroment file to run --- Dockerfile | 2 +- environment.yml | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 650ba14271092..93f0b85dfb04f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM quay.io/condaforge/miniforge3 # if you forked pandas, you can pass in your own GitHub username to use your fork # i.e. gh_username=myname -ARG gh_username=pandas-dev +ARG gh_username=bwozniak27 ARG pandas_home="/home/pandas" # Avoid warnings by switching to noninteractive diff --git a/environment.yml b/environment.yml index ac8921b12f4a3..590f1a3e2241a 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv < 0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv<=0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. @@ -118,6 +118,7 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray<0.19 # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - - pyreadstat # pandas.read_spss + + # pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - natsort # DataFrame.sort_values From 5d0e4d18d58cc6e5a81814d6a34d47855b90f41f Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 16:19:29 -0400 Subject: [PATCH 02/14] docker switch branches --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 93f0b85dfb04f..5c47e7c79e5f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,8 @@ RUN mkdir "$pandas_home" \ && git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \ && cd "$pandas_home" \ && git remote add upstream "https://github.com/pandas-dev/pandas.git" \ - && git pull upstream main + && git pull upstream main \ + && git checkout join-valueError # Because it is surprisingly difficult to activate a conda environment inside a DockerFile # (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89), From dd1963b2df9e057d0142bcb44d31ef148218c66b Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 16:23:19 -0400 Subject: [PATCH 03/14] removed asv under 0.5.0 req --- environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 590f1a3e2241a..fd0cc092b4612 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv<=0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. @@ -118,7 +118,7 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray<0.19 # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - + # pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - natsort # DataFrame.sort_values From 896f01d7154921edcf82620f8cfde311161b9aa5 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 16:35:09 -0400 Subject: [PATCH 04/14] added back --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index fd0cc092b4612..b67f849fa38ff 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv<0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. From 7c4ec3a2f6aedd73f406a5a7875d523467d0de07 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 16:40:35 -0400 Subject: [PATCH 05/14] its back --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index b67f849fa38ff..fd0cc092b4612 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv<0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. From 3af28580c538ed846ebd178787354aafb6f6a3b9 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 28 Mar 2022 17:08:52 -0400 Subject: [PATCH 06/14] removed asv --- Dockerfile | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5c47e7c79e5f9..f53e7a47e413d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM quay.io/condaforge/miniforge3 # if you forked pandas, you can pass in your own GitHub username to use your fork # i.e. gh_username=myname ARG gh_username=bwozniak27 -ARG pandas_home="/home/pandas" +ARG pandas_home="/home/pandas_container" # Avoid warnings by switching to noninteractive ENV DEBIAN_FRONTEND=noninteractive diff --git a/environment.yml b/environment.yml index fd0cc092b4612..b54707bdd43f2 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + # asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. From 4781c300b52b95fcf2df681953519df577a97914 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Thu, 31 Mar 2022 12:49:25 -0400 Subject: [PATCH 07/14] orginal files --- Dockerfile | 7 +++---- environment.yml | 13 ++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index f53e7a47e413d..c6217277e35c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM quay.io/condaforge/miniforge3 # if you forked pandas, you can pass in your own GitHub username to use your fork # i.e. gh_username=myname ARG gh_username=bwozniak27 -ARG pandas_home="/home/pandas_container" +ARG pandas_home="/home/pandas" # Avoid warnings by switching to noninteractive ENV DEBIAN_FRONTEND=noninteractive @@ -33,8 +33,7 @@ RUN mkdir "$pandas_home" \ && git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \ && cd "$pandas_home" \ && git remote add upstream "https://github.com/pandas-dev/pandas.git" \ - && git pull upstream main \ - && git checkout join-valueError + && git pull upstream main # Because it is surprisingly difficult to activate a conda environment inside a DockerFile # (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89), @@ -51,4 +50,4 @@ RUN . /opt/conda/etc/profile.d/conda.sh \ && cd "$pandas_home" \ && export \ && python setup.py build_ext -j 4 \ - && python -m pip install --no-build-isolation -e . + && python -m pip install --no-build-isolation -e . \ No newline at end of file diff --git a/environment.yml b/environment.yml index b54707bdd43f2..eecd35210e0d9 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - # asv # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv < 0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. @@ -18,7 +18,7 @@ dependencies: - cython>=0.29.24 # code checks - - black=22.1.0 + - black=22.3.0 - cpplint - flake8=4.0.1 - flake8-bugbear=21.3.2 # used by flake8, find likely bugs @@ -33,7 +33,7 @@ dependencies: - gitpython # obtain contributors from git for whatsnew - gitdb - numpydoc - - pandas-dev-flaker=0.4.0 + - pandas-dev-flaker=0.5.0 - pydata-sphinx-theme - pytest-cython - sphinx @@ -69,7 +69,7 @@ dependencies: - pytest>=6.0 - pytest-cov - pytest-xdist>=1.31 - - pytest-asyncio + - pytest-asyncio>=0.17 - pytest-instafail # downstream tests @@ -118,7 +118,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray<0.19 # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - - # pyreadstat # pandas.read_spss + - pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - - natsort # DataFrame.sort_values + - natsort # DataFrame.sort_values \ No newline at end of file From ee1a87254d644a5e55068a22be0405794c6a2e8a Mon Sep 17 00:00:00 2001 From: bwozniak Date: Sun, 3 Apr 2022 16:49:51 -0400 Subject: [PATCH 08/14] added explicit error and tests --- pandas/core/frame.py | 5 +++++ pandas/tests/frame/methods/test_join.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0013ddf73cddc..32625c614168d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9489,6 +9489,11 @@ def _join_compat( "Joining multiple DataFrames only supported for joining on index" ) + if rsuffix or lsuffix: + raise ValueError( + "Suffixes not supported when joining multiple DataFrames" + ) + frames = [self] + list(other) can_concat = all(df.index.is_unique for df in frames) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index c6bfd94b84908..1ee86c1a2a731 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -82,6 +82,26 @@ def test_join(left, right, how, sort, expected): tm.assert_frame_equal(result, expected) +def test_suffix_on_list_join(): + first = DataFrame({"key": [1, 2, 3, 4, 5]}) + second = DataFrame({"key": [1, 8, 3, 2, 5], "v1": [1, 2, 3, 4, 5]}) + third = DataFrame({"keys": [5, 2, 3, 4, 1], "v2": [1, 2, 3, 4, 5]}) + + msg = "Suffixes not supported when joining multiple DataFrames" + with pytest.raises(ValueError, match=msg): + first.join([second], lsuffix="y") + with pytest.raises(ValueError, match=msg): + first.join([second, third], rsuffix="x") + with pytest.raises(ValueError, match=msg): + first.join([second, third], lsuffix="y", rsuffix="x") + with pytest.raises(ValueError, match="Indexes have overlapping values"): + first.join([second, third]) + + arr_joined = first.join([third]) + norm_joined = first.join(third) + tm.assert_frame_equal(arr_joined, norm_joined) + + def test_join_index(float_frame): # left / right From 7f616cd7774e5256cf3d07b20e60a7b771d9c968 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Sun, 3 Apr 2022 17:01:25 -0400 Subject: [PATCH 09/14] remove changes to environment.yml and Dockerfile --- Dockerfile | 2 +- environment.yml | 2 +- pandas/tests/frame/methods/test_join.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c6217277e35c7..12a8727a835cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM quay.io/condaforge/miniforge3 # if you forked pandas, you can pass in your own GitHub username to use your fork # i.e. gh_username=myname -ARG gh_username=bwozniak27 +ARG gh_username=pandas-dev ARG pandas_home="/home/pandas" # Avoid warnings by switching to noninteractive diff --git a/environment.yml b/environment.yml index eecd35210e0d9..af78c99e0cbe4 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv < 0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI + - asv # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 1ee86c1a2a731..36f3d04a7b6ac 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -87,6 +87,7 @@ def test_suffix_on_list_join(): second = DataFrame({"key": [1, 8, 3, 2, 5], "v1": [1, 2, 3, 4, 5]}) third = DataFrame({"keys": [5, 2, 3, 4, 1], "v2": [1, 2, 3, 4, 5]}) + # check proper errors are raised msg = "Suffixes not supported when joining multiple DataFrames" with pytest.raises(ValueError, match=msg): first.join([second], lsuffix="y") @@ -97,6 +98,7 @@ def test_suffix_on_list_join(): with pytest.raises(ValueError, match="Indexes have overlapping values"): first.join([second, third]) + # no errors should be raised arr_joined = first.join([third]) norm_joined = first.join(third) tm.assert_frame_equal(arr_joined, norm_joined) From b1417f2f729ea75def535231d8a018a892818e01 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Sun, 3 Apr 2022 17:13:05 -0400 Subject: [PATCH 10/14] Reverted contents of Dockerfile and environment.yml --- Dockerfile | 2 +- environment.yml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 12a8727a835cc..650ba14271092 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,4 +50,4 @@ RUN . /opt/conda/etc/profile.d/conda.sh \ && cd "$pandas_home" \ && export \ && python setup.py build_ext -j 4 \ - && python -m pip install --no-build-isolation -e . \ No newline at end of file + && python -m pip install --no-build-isolation -e . diff --git a/environment.yml b/environment.yml index af78c99e0cbe4..ac8921b12f4a3 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - pytz # benchmarks - - asv + - asv < 0.5.0 # 2022-02-08: v0.5.0 > leads to ASV checks running > 3 hours on CI # building # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. @@ -18,7 +18,7 @@ dependencies: - cython>=0.29.24 # code checks - - black=22.3.0 + - black=22.1.0 - cpplint - flake8=4.0.1 - flake8-bugbear=21.3.2 # used by flake8, find likely bugs @@ -33,7 +33,7 @@ dependencies: - gitpython # obtain contributors from git for whatsnew - gitdb - numpydoc - - pandas-dev-flaker=0.5.0 + - pandas-dev-flaker=0.4.0 - pydata-sphinx-theme - pytest-cython - sphinx @@ -69,7 +69,7 @@ dependencies: - pytest>=6.0 - pytest-cov - pytest-xdist>=1.31 - - pytest-asyncio>=0.17 + - pytest-asyncio - pytest-instafail # downstream tests @@ -120,4 +120,4 @@ dependencies: - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - - natsort # DataFrame.sort_values \ No newline at end of file + - natsort # DataFrame.sort_values From 25e077fc49a13ddcf199cb1049289f6e663d5627 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 4 Apr 2022 16:51:43 -0400 Subject: [PATCH 11/14] added to docs --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8c02785647861..7aada91df69c9 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -555,6 +555,7 @@ Styler Other ^^^^^ +- Bug in `DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) .. ***DO NOT USE THIS SECTION*** From 92c93afb72a64731b836c3b1e54e107781b476bd Mon Sep 17 00:00:00 2001 From: bwozniak Date: Mon, 4 Apr 2022 17:04:04 -0400 Subject: [PATCH 12/14] corrct format for rst file --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7aada91df69c9..593197c5f0ac2 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -555,7 +555,7 @@ Styler Other ^^^^^ -- Bug in `DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) +- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) .. ***DO NOT USE THIS SECTION*** From 9d9f7df0668cb8da605112671c4095e8531edea0 Mon Sep 17 00:00:00 2001 From: bwozniak Date: Wed, 6 Apr 2022 11:41:22 -0400 Subject: [PATCH 13/14] moved documentation to reshaping --- doc/source/whatsnew/v1.5.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 593197c5f0ac2..375b2543a0f01 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -536,7 +536,8 @@ Reshaping - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) -- +- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) +- Sparse ^^^^^^ @@ -555,7 +556,6 @@ Styler Other ^^^^^ -- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) .. ***DO NOT USE THIS SECTION*** From 656051b9b9930c9f1e38ddf61dbc26b59fc2e5ec Mon Sep 17 00:00:00 2001 From: bwozniak Date: Wed, 6 Apr 2022 11:57:13 -0400 Subject: [PATCH 14/14] removed whitespace --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1523c0a561f34..ebbdb99bf81d2 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -602,7 +602,7 @@ Reshaping - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) -- +- Sparse ^^^^^^