From d140f375e7dfe16f01d802a9571eb4c01b99ee83 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 10 Sep 2021 11:19:29 +0200 Subject: [PATCH 01/10] ENH Remove import time warning for missing lzma --- pandas/_testing/_io.py | 5 +---- pandas/compat/__init__.py | 25 ++++--------------------- pandas/io/common.py | 4 +--- pandas/tests/io/test_compression.py | 13 +++++++++++++ pandas/tests/io/test_pickle.py | 5 +---- pandas/tests/io/test_stata.py | 2 +- pandas/tests/io/xml/test_xml.py | 4 ++-- 7 files changed, 23 insertions(+), 35 deletions(-) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index a0b6963cfac97..5a84e8b892093 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -15,7 +15,6 @@ ) from pandas.compat import ( get_lzma_file, - import_lzma, ) import pandas as pd @@ -26,8 +25,6 @@ _RAISE_NETWORK_ERROR_DEFAULT = False -lzma = import_lzma() - # skip tests on exceptions with these messages _network_error_messages = ( # 'urlopen error timed out', @@ -387,7 +384,7 @@ def write_to_compressed(compression, path, data, dest="test"): elif compression == "bz2": compress_method = bz2.BZ2File elif compression == "xz": - compress_method = get_lzma_file(lzma) + compress_method = get_lzma_file() else: raise ValueError(f"Unrecognized compression type: {compression}") diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 3233de8e3b6d1..85beeb702e1ff 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -102,27 +102,7 @@ def is_platform_arm() -> bool: return platform.machine() in ("arm64", "aarch64") -def import_lzma(): - """ - Importing the `lzma` module. - - Warns - ----- - When the `lzma` module is not available. - """ - try: - import lzma - - return lzma - except ImportError: - msg = ( - "Could not import the lzma module. Your installed Python is incomplete. " - "Attempting to use lzma compression will result in a RuntimeError." - ) - warnings.warn(msg) - - -def get_lzma_file(lzma): +def get_lzma_file(): """ Importing the `LZMAFile` class from the `lzma` module. @@ -137,6 +117,9 @@ def get_lzma_file(lzma): If the `lzma` module was not imported correctly, or didn't exist. """ if lzma is None: + try: + import lzma + except ImportError: raise RuntimeError( "lzma module not available. " "A Python re-install with the proper dependencies, " diff --git a/pandas/io/common.py b/pandas/io/common.py index 1058e98445284..1fc7872b3fb62 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -51,8 +51,6 @@ from pandas.core.dtypes.common import is_file_like -lzma = import_lzma() - _VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS.discard("") @@ -685,7 +683,7 @@ def get_handle( # XZ Compression elif compression == "xz": - handle = get_lzma_file(lzma)(handle, ioargs.mode) + handle = get_lzma_file()(handle, ioargs.mode) # Unrecognized Compression else: diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 6c90830639061..cda3df37406c3 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -191,6 +191,19 @@ def test_with_missing_lzma(): subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) +def test_lzma_not_imported(): + """Check that lzma is not imported by default""" + # https://github.com/pandas-dev/pandas/issues/43461 + code = textwrap.dedent( + """\ + import sys + import pandas + assert "lzma" not in sys.modules + """ + ) + subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) + + def test_with_missing_lzma_runtime(): """Tests if RuntimeError is hit when calling lzma without having the module available. diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 9253e5ae700c7..f2f9f403dbbf1 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -33,7 +33,6 @@ from pandas.compat import ( get_lzma_file, - import_lzma, is_platform_little_endian, ) import pandas.util._test_decorators as td @@ -51,8 +50,6 @@ MonthEnd, ) -lzma = import_lzma() - # TODO(ArrayManager) pickling pytestmark = [ @@ -313,7 +310,7 @@ def compress_file(self, src_path, dest_path, compression): with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f: f.write(src_path, os.path.basename(src_path)) elif compression == "xz": - f = get_lzma_file(lzma)(dest_path, "w") + f = get_lzma_file()(dest_path, "w") else: msg = f"Unrecognized compression type: {compression}" raise ValueError(msg) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 02cf478c61583..9f7f8b7333eb9 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -3,7 +3,6 @@ from datetime import datetime import gzip import io -import lzma import os import struct import warnings @@ -1904,6 +1903,7 @@ def test_compression(compression, version, use_dict, infer): with bz2.open(path, "rb") as comp: fp = io.BytesIO(comp.read()) elif compression == "xz": + lzma = pytest.importorskip('lzma') with lzma.open(path, "rb") as comp: fp = io.BytesIO(comp.read()) elif compression is None: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 7e9a03c2a59a8..8c465fbc4d6b4 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1044,12 +1044,12 @@ def test_wrong_compression_gz(parser, comp): @pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"]) def test_wrong_compression_xz(parser, comp): - from lzma import LZMAError + lzma = pytest.imortorskip('lzma') with tm.ensure_clean() as path: geom_df.to_xml(path, parser=parser, compression=comp) - with pytest.raises(LZMAError, match="Input format not supported by decoder"): + with pytest.raises(lzma.LZMAError, match="Input format not supported by decoder"): read_xml(path, parser=parser, compression="xz") From 73416452bd01b029120c8c95aeffadb1eff1029b Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 10 Sep 2021 11:36:53 +0200 Subject: [PATCH 02/10] Lint --- pandas/compat/__init__.py | 2 -- pandas/tests/io/xml/test_xml.py | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 85beeb702e1ff..a6a344418c67b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -9,7 +9,6 @@ """ import platform import sys -import warnings from pandas._typing import F from pandas.compat.numpy import ( @@ -116,7 +115,6 @@ def get_lzma_file(): RuntimeError If the `lzma` module was not imported correctly, or didn't exist. """ - if lzma is None: try: import lzma except ImportError: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 8c465fbc4d6b4..4692b5535bfa7 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1044,12 +1044,14 @@ def test_wrong_compression_gz(parser, comp): @pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"]) def test_wrong_compression_xz(parser, comp): - lzma = pytest.imortorskip('lzma') + lzma = pytest.importorskip("lzma") with tm.ensure_clean() as path: geom_df.to_xml(path, parser=parser, compression=comp) - with pytest.raises(lzma.LZMAError, match="Input format not supported by decoder"): + with pytest.raises( + lzma.LZMAError, match="Input format not supported by decoder" + ): read_xml(path, parser=parser, compression="xz") From a4b65e127d1b432428f47d006322cc54d361ad20 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 10 Sep 2021 11:46:59 +0200 Subject: [PATCH 03/10] More fixes --- pandas/io/common.py | 5 +---- pandas/tests/io/test_stata.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 1fc7872b3fb62..a70007d77a81a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -43,10 +43,7 @@ FilePathOrBuffer, StorageOptions, ) -from pandas.compat import ( - get_lzma_file, - import_lzma, -) +from pandas.compat import get_lzma_file from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import is_file_like diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 9f7f8b7333eb9..35ba2731d091f 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1903,7 +1903,7 @@ def test_compression(compression, version, use_dict, infer): with bz2.open(path, "rb") as comp: fp = io.BytesIO(comp.read()) elif compression == "xz": - lzma = pytest.importorskip('lzma') + lzma = pytest.importorskip("lzma") with lzma.open(path, "rb") as comp: fp = io.BytesIO(comp.read()) elif compression is None: From 4885afb0c614af58ba6872d4cff5d04189dcf10b Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 8 Oct 2021 13:30:37 +0200 Subject: [PATCH 04/10] Address review comments --- ci/code_checks.sh | 2 +- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/tests/io/test_compression.py | 13 ------------- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7e4b5775af317..b4a7c86ce3551 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -47,7 +47,7 @@ import pandas blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', 'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', - 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt', 'lzma'} # GH#28227 for some of these check for top-level modules, while others are # more specific (e.g. urllib.request) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5d7bf4fc36e9e..59aea60373120 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -390,6 +390,7 @@ I/O - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`) - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`) +- Bug when Python was built without lzma module: a warning was raised at the pandas import time, even if the lzma module wasn't used. (:issue:`43495`) Period ^^^^^^ diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index cda3df37406c3..6c90830639061 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -191,19 +191,6 @@ def test_with_missing_lzma(): subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) -def test_lzma_not_imported(): - """Check that lzma is not imported by default""" - # https://github.com/pandas-dev/pandas/issues/43461 - code = textwrap.dedent( - """\ - import sys - import pandas - assert "lzma" not in sys.modules - """ - ) - subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) - - def test_with_missing_lzma_runtime(): """Tests if RuntimeError is hit when calling lzma without having the module available. From 10bc68591ae238e802933d2b5fef8d9cd702c413 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 8 Oct 2021 13:44:43 +0200 Subject: [PATCH 05/10] Mention it in install.rst --- doc/source/getting_started/install.rst | 1 + pandas/_testing/_io.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 20ae37c85a9d9..83a50d0f84a7c 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -362,6 +362,7 @@ zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing pyarrow 0.17.0 Parquet, ORC, and feather reading / writing pyreadstat SPSS files (.sav) reading +Python with lzma module None Reading files with .lzma or .xz compression ========================= ================== ============================================================= .. _install.warn_orc: diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index ec59db7d28870..0df4507964078 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -11,9 +11,7 @@ import zipfile from pandas._typing import FilePathOrBuffer -from pandas.compat import ( - get_lzma_file, -) +from pandas.compat import get_lzma_file import pandas as pd from pandas._testing._random import rands From 3155c3e8f4424b3acfae7073048e234ccace69d5 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 8 Oct 2021 13:49:06 +0200 Subject: [PATCH 06/10] Improve wording --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f7e27f493034f..a999c74abd88b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -474,7 +474,7 @@ I/O - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`) - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) -- Bug when Python was built without lzma module: a warning was raised at the pandas import time, even if the lzma module wasn't used. (:issue:`43495`) +- Bug when Python is built without lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used. (:issue:`43495`) - Period From 887b519211c0337e13dd1a969cfde5a9c759471d Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 8 Oct 2021 14:08:54 +0200 Subject: [PATCH 07/10] More lint fixes --- pandas/tests/io/test_pickle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f2f9f403dbbf1..2c292323de16e 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -50,7 +50,6 @@ MonthEnd, ) - # TODO(ArrayManager) pickling pytestmark = [ td.skip_array_manager_not_yet_implemented, From 4bf231e12f35f86ed2dc9cf1d33da492b20513da Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sat, 6 Nov 2021 12:00:04 +0100 Subject: [PATCH 08/10] Address review comment --- doc/source/getting_started/install.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 8daa961fae48b..da70549687594 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -362,7 +362,6 @@ zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing pyarrow 1.0.1 Parquet, ORC, and feather reading / writing pyreadstat SPSS files (.sav) reading -Python with lzma module None Reading files with .lzma or .xz compression ========================= ================== ============================================================= .. _install.warn_orc: From 986a549732e9bd16284b8d919f30824a49665424 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sun, 28 Nov 2021 22:35:56 +0100 Subject: [PATCH 09/10] Remove lzma from the blocklist --- ci/code_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 4fae11cc96008..503120e486f5a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -47,7 +47,7 @@ import pandas blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', 'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', - 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt', 'lzma'} + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} # GH#28227 for some of these check for top-level modules, while others are # more specific (e.g. urllib.request) From 02d6e5de4678851402d1102eb5a7e2cfe58db1f5 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sun, 28 Nov 2021 22:37:26 +0100 Subject: [PATCH 10/10] Fix merge conflict --- doc/source/whatsnew/v1.4.0.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b50e3dfc46605..b4502912b72e5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -667,11 +667,8 @@ I/O - Bug in :func:`read_csv` used second row to guess implicit index if ``header`` was set to ``None`` for ``engine="python"`` (:issue:`22144`) - Bug in :func:`read_csv` not recognizing bad lines when ``names`` were given for ``engine="c"`` (:issue:`22144`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) -<<<<<<< HEAD - Bug when Python is built without lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used. (:issue:`43495`) -======= - Bug in :func:`read_csv` not applying dtype for ``index_col`` (:issue:`9435`) ->>>>>>> upstream/master - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) - Bug in :func:`read_csv` raising ``ValueError`` when names was longer than header but equal to data rows for ``engine="python"`` (:issue:`38453`) - Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`)