From bb520d105f0a2ade434dde713bad6feaab50e485 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:07:54 -0800 Subject: [PATCH 01/13] add warning --- pandas/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/__init__.py b/pandas/__init__.py index 7fab662ed2de4..e11ae0f13a6e3 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -205,6 +205,25 @@ # Don't allow users to use pandas.os or pandas.warnings del os, warnings +# DeprecationWarning for missing pyarrow +from pandas.compat.pyarrow import pa_version_under10p1 + +if not pa_version_under10p1: + import warnings + from pandas.compat._optional import VERSIONS + + warnings.warn( + "Pyarrow will become a future required dependency of pandas," + "but was not found to be installed on your system." + f"(or was too old - pyarrow {VERSIONS['pyarrow']} is the current" + f"minimum supported version as of this release)" + "If this would cause problems for you, " + "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466", + DeprecationWarning, + stacklevel=2, + ) +del pa_version_under10p1 + # module level doc-string __doc__ = """ pandas - a powerful data analysis and manipulation library for Python From a3b3dc3c30d279f3f35b160e5bc64613cc991e55 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 15 Jan 2024 13:26:09 -0800 Subject: [PATCH 02/13] DEPS: Add warning if pyarrow is not installed --- pandas/__init__.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index e11ae0f13a6e3..251e7b6732bf8 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -208,17 +208,19 @@ # DeprecationWarning for missing pyarrow from pandas.compat.pyarrow import pa_version_under10p1 -if not pa_version_under10p1: +if pa_version_under10p1: import warnings from pandas.compat._optional import VERSIONS warnings.warn( - "Pyarrow will become a future required dependency of pandas," - "but was not found to be installed on your system." - f"(or was too old - pyarrow {VERSIONS['pyarrow']} is the current" - f"minimum supported version as of this release)" - "If this would cause problems for you, " - "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466", + f""" +Pyarrow will become a future required dependency of pandas, +but was not found to be installed on your system. +(or was too old-pyarrow {VERSIONS['pyarrow']} +is the current minimum supported version as of this release) +If this would cause problems for you, +please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 + """, DeprecationWarning, stacklevel=2, ) From 944c2887c5c860428157ec02215cf98aefd18c75 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 15 Jan 2024 13:29:05 -0800 Subject: [PATCH 03/13] formatting --- pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 251e7b6732bf8..ede56920756d6 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -216,7 +216,7 @@ f""" Pyarrow will become a future required dependency of pandas, but was not found to be installed on your system. -(or was too old-pyarrow {VERSIONS['pyarrow']} +(or was too old - pyarrow {VERSIONS['pyarrow']} is the current minimum supported version as of this release) If this would cause problems for you, please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 From 613eac784f37528c11e4b89698c418ab93f5bb53 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 15 Jan 2024 20:09:42 -0800 Subject: [PATCH 04/13] Update __init__.py --- pandas/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index ede56920756d6..70f5dcb3d1088 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -202,8 +202,8 @@ FutureWarning, stacklevel=2, ) -# Don't allow users to use pandas.os or pandas.warnings -del os, warnings +# Don't allow users to use pandas.os +del os # DeprecationWarning for missing pyarrow from pandas.compat.pyarrow import pa_version_under10p1 @@ -224,7 +224,7 @@ DeprecationWarning, stacklevel=2, ) -del pa_version_under10p1 +del pa_version_under10p1, warnings # module level doc-string __doc__ = """ From 21509c5021377861bf017430689eac04c8ca1958 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:41:24 -0800 Subject: [PATCH 05/13] adjustments --- pandas/__init__.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 70f5dcb3d1088..321a8f2b7fd76 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -209,21 +209,31 @@ from pandas.compat.pyarrow import pa_version_under10p1 if pa_version_under10p1: - import warnings + # Check if old pyarrow is installed from pandas.compat._optional import VERSIONS + try: + import pyarrow # noqa: F401 + + pa_msg = ( + f"was too old on your system - pyarrow {VERSIONS['pyarrow']} " + "is the current minimum supported version as of this release." + ) + except ImportError: + pa_msg = "was not found to be installed on your system." + warnings.warn( f""" Pyarrow will become a future required dependency of pandas, -but was not found to be installed on your system. -(or was too old - pyarrow {VERSIONS['pyarrow']} -is the current minimum supported version as of this release) +(to allow more performant data types and better interoperability with other libraries) +but {pa_msg} If this would cause problems for you, please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 """, DeprecationWarning, stacklevel=2, ) + del VERSIONS del pa_version_under10p1, warnings # module level doc-string From c6ae03d009b60c1f1dec961abe2d35e4e8557878 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:42:42 -0800 Subject: [PATCH 06/13] adjustments --- pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 321a8f2b7fd76..9d3056e20d58d 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -224,7 +224,7 @@ warnings.warn( f""" -Pyarrow will become a future required dependency of pandas, +Pyarrow will become a required dependency of pandas in the next major release of pandas, (to allow more performant data types and better interoperability with other libraries) but {pa_msg} If this would cause problems for you, From 7b48501623859807088c5820df884ed341086866 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 17 Jan 2024 07:42:11 -0800 Subject: [PATCH 07/13] updates --- pandas/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 9d3056e20d58d..2c3ccb7e65e41 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -202,8 +202,6 @@ FutureWarning, stacklevel=2, ) -# Don't allow users to use pandas.os -del os # DeprecationWarning for missing pyarrow from pandas.compat.pyarrow import pa_version_under10p1 @@ -213,12 +211,13 @@ from pandas.compat._optional import VERSIONS try: - import pyarrow # noqa: F401 + import pyarrow pa_msg = ( f"was too old on your system - pyarrow {VERSIONS['pyarrow']} " "is the current minimum supported version as of this release." ) + del pyarrow except ImportError: pa_msg = "was not found to be installed on your system." @@ -234,7 +233,9 @@ stacklevel=2, ) del VERSIONS -del pa_version_under10p1, warnings + +# Delete all unnecessary imported modules +del pa_version_under10p1, warnings, os # module level doc-string __doc__ = """ From 199fb7c7dd65fb6a903e00618fdd4b60a34f62e0 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 17 Jan 2024 10:18:42 -0800 Subject: [PATCH 08/13] address code review --- pandas/__init__.py | 21 +++++++++------------ pandas/compat/pyarrow.py | 2 ++ pandas/tests/test_common.py | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 2c3ccb7e65e41..33612a894d038 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -204,38 +204,35 @@ ) # DeprecationWarning for missing pyarrow -from pandas.compat.pyarrow import pa_version_under10p1 +from pandas.compat.pyarrow import pa_version_under10p1, pa_not_found if pa_version_under10p1: - # Check if old pyarrow is installed + # pyarrow is either too old or nonexistent, warn from pandas.compat._optional import VERSIONS - try: - import pyarrow - + if pa_not_found: + pa_msg = "was not found to be installed on your system." + else: pa_msg = ( f"was too old on your system - pyarrow {VERSIONS['pyarrow']} " "is the current minimum supported version as of this release." ) - del pyarrow - except ImportError: - pa_msg = "was not found to be installed on your system." warnings.warn( f""" -Pyarrow will become a required dependency of pandas in the next major release of pandas, -(to allow more performant data types and better interoperability with other libraries) +Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), +(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but {pa_msg} If this would cause problems for you, please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 - """, + """, # noqa: E501 DeprecationWarning, stacklevel=2, ) del VERSIONS # Delete all unnecessary imported modules -del pa_version_under10p1, warnings, os +del pa_version_under10p1, pa_not_found, warnings, os # module level doc-string __doc__ = """ diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index beb4814914101..2e151123ef2c9 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -8,6 +8,7 @@ import pyarrow as pa _palv = Version(Version(pa.__version__).base_version) + pa_not_found = False pa_version_under10p1 = _palv < Version("10.0.1") pa_version_under11p0 = _palv < Version("11.0.0") pa_version_under12p0 = _palv < Version("12.0.0") @@ -16,6 +17,7 @@ pa_version_under14p1 = _palv < Version("14.0.1") pa_version_under15p0 = _palv < Version("15.0.0") except ImportError: + pa_not_found = True pa_version_under10p1 = True pa_version_under11p0 = True pa_version_under12p0 = True diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index e8a1c961c8cb6..00dca96799b05 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -265,3 +265,26 @@ def test_bz2_missing_import(): code = textwrap.dedent(code) call = [sys.executable, "-c", code] subprocess.check_output(call) + + +import pandas.util._test_decorators as td + + +@td.skip_if_installed("pyarrow") +@pytest.mark.parametrize("module", ["pandas", "pandas.arrays"]) +def test_pyarrow_missing_warn(module): + # GH56896 + response = subprocess.run( + [sys.executable, "-c", f"import {module}"], + capture_output=True, + check=True, + ) + msg = """ +Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), +(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) +but was not found to be installed on your system. +If this would cause problems for you, +please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 +""" # noqa: E501 + stderr_msg = response.stderr.decode("utf-8") + assert msg in stderr_msg, stderr_msg From 4637a96afcb582dec8d6ac7cdb3405d6efe0365a Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:28:38 -0800 Subject: [PATCH 09/13] Update __init__.py --- pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 33612a894d038..ed524c2bb3619 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -229,7 +229,7 @@ DeprecationWarning, stacklevel=2, ) - del VERSIONS + del VERSIONS, pa_msg # Delete all unnecessary imported modules del pa_version_under10p1, pa_not_found, warnings, os From aa48e193f438e762d8f40fc9b3e4b6e471c8af0c Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 18 Jan 2024 16:21:49 -0800 Subject: [PATCH 10/13] add pyarrow to npdev build --- ci/deps/actions-311-numpydev.yaml | 1 + pandas/tests/test_common.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index b62e8630f2059..d6451cf2577fd 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -20,6 +20,7 @@ dependencies: - hypothesis>=6.46.1 # pandas dependencies + - pyarrow>=10.0.1 - python-dateutil - pytz - pip diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 00dca96799b05..fe24755e8cc23 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -8,6 +8,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import Series import pandas._testing as tm @@ -267,9 +269,6 @@ def test_bz2_missing_import(): subprocess.check_output(call) -import pandas.util._test_decorators as td - - @td.skip_if_installed("pyarrow") @pytest.mark.parametrize("module", ["pandas", "pandas.arrays"]) def test_pyarrow_missing_warn(module): From 3aa3ba95123852d3f7980ef567e556c23547103c Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 18 Jan 2024 20:53:55 -0800 Subject: [PATCH 11/13] ignore non numpy related deprecationwarnings/futurewarning --- .github/workflows/unit-tests.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index dd5d090e098b0..8ade9f689db2d 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -92,7 +92,10 @@ jobs: - name: "Numpy Dev" env_file: actions-311-numpydev.yaml pattern: "not slow and not network and not single_cpu" - test_args: "-W error::DeprecationWarning -W error::FutureWarning" + # Currently restricted the warnings that error to Deprecation Warnings from numpy + # + # TODO: + test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy" - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" From 0074b031c6571abe2e6e43f12e1e0af3fec6c166 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 18 Jan 2024 20:54:56 -0800 Subject: [PATCH 12/13] ignore non numpy related deprecationwarnings/futurewarning --- .github/workflows/unit-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8ade9f689db2d..a3cffb4b03b93 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -93,8 +93,8 @@ jobs: env_file: actions-311-numpydev.yaml pattern: "not slow and not network and not single_cpu" # Currently restricted the warnings that error to Deprecation Warnings from numpy - # - # TODO: + # done since pyarrow isn't compatible with numpydev always + # TODO: work with pyarrow to revert this? test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy" - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml From cc2debf3e484ce7d553794435da39e972c8b3723 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri, 19 Jan 2024 08:00:35 -0800 Subject: [PATCH 13/13] Update actions-311-numpydev.yaml --- ci/deps/actions-311-numpydev.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index d6451cf2577fd..b62e8630f2059 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -20,7 +20,6 @@ dependencies: - hypothesis>=6.46.1 # pandas dependencies - - pyarrow>=10.0.1 - python-dateutil - pytz - pip