From c1a53267188892c3f987e7ea2eeb8e25ad2b7066 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 16:55:25 -0400 Subject: [PATCH 01/19] Importing lzma when Python has been compiled without its support will raise a warning. Substituted import lzma for helper function. --- pandas/_libs/parsers.pyx | 4 +++- pandas/compat/__init__.py | 13 +++++++++++++ pandas/io/common.py | 4 +++- pandas/util/testing.py | 7 ++++--- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index cafc31dad3568..3853496299145 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2,7 +2,6 @@ # See LICENSE for the license import bz2 import gzip -import lzma import os import sys import time @@ -59,9 +58,12 @@ from pandas.core.arrays import Categorical from pandas.core.dtypes.concat import union_categoricals import pandas.io.common as icom +from pandas.compat import import_lzma from pandas.errors import (ParserError, DtypeWarning, EmptyDataError, ParserWarning) +lzma = import_lzma() + # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. CParserError = ParserError diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 5ecd641fc68be..6db620708b182 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -65,3 +65,16 @@ def is_platform_mac(): def is_platform_32bit(): return struct.calcsize("P") * 8 < 64 + + +def import_lzma(): + import warnings + try: + import lzma + return lzma + except ImportError: + msg = ( + "Could not import the lzma module. Your installed Python is incomplete. " + "Attempting to use `lzma` compression will result in a RuntimeError." + ) + warnings.warn(msg) diff --git a/pandas/io/common.py b/pandas/io/common.py index e01e473047b88..b67b9d79306ae 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -6,7 +6,6 @@ import gzip from http.client import HTTPException # noqa from io import BytesIO -import lzma import mmap import os import pathlib @@ -31,10 +30,13 @@ ParserWarning, ) +from pandas.compat import import_lzma from pandas.core.dtypes.common import is_file_like from pandas._typing import FilePathOrBuffer +lzma = import_lzma() + # gh-12665: Alias for now and remove later. CParserError = ParserError diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cf8452cdd0c59..805adf8a29b49 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -5,7 +5,6 @@ from functools import wraps import gzip import http.client -import lzma import os import re from shutil import rmtree @@ -26,7 +25,7 @@ ) import pandas._libs.testing as _testing -from pandas.compat import raise_with_traceback +from pandas.compat import raise_with_traceback, import_lzma from pandas.core.dtypes.common import ( is_bool, @@ -70,6 +69,8 @@ from pandas.io.common import urlopen from pandas.io.formats.printing import pprint_thing +lzma = import_lzma() + N = 30 K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False @@ -264,7 +265,7 @@ def write_to_compressed(compression, path, data, dest="test"): compress_method = bz2.BZ2File elif compression == "xz": - import lzma + lzma = import_lzma() compress_method = lzma.LZMAFile else: From f9b420bb686373fd9f371b5759767ed6110af680 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:12:30 -0400 Subject: [PATCH 02/19] Substituted import lzma to call to import_lzma --- pandas/tests/io/test_pickle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 076d0c9f947c7..dff07f8c8ddc6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -13,7 +13,6 @@ import bz2 import glob import gzip -import lzma import os import pickle import shutil @@ -22,7 +21,7 @@ import pytest -from pandas.compat import is_platform_little_endian +from pandas.compat import is_platform_little_endian, import_lzma import pandas as pd from pandas import Index @@ -30,6 +29,8 @@ from pandas.tseries.offsets import Day, MonthEnd +lzma = import_lzma() + @pytest.fixture(scope="module") def current_pickle_data(): From 998b5aad7402688cd186d2e5b7072197f8e09f02 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:24:01 -0400 Subject: [PATCH 03/19] Calls to lzma module will raise a RuntimeError. --- pandas/io/common.py | 5 ++++- pandas/tests/io/test_pickle.py | 5 ++++- pandas/util/testing.py | 11 ++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index b67b9d79306ae..a6add81bb5b7b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -397,7 +397,10 @@ def _get_handle( # XZ Compression elif compression == "xz": - f = lzma.LZMAFile(path_or_buf, mode) + if lzma is None: + raise RuntimeError("lzma module not available.") + else: + f = lzma.LZMAFile(path_or_buf, mode) # Unrecognized Compression else: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index dff07f8c8ddc6..f8eace47b44aa 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -271,7 +271,10 @@ def compress_file(self, src_path, dest_path, compression): with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f: f.write(src_path, os.path.basename(src_path)) elif compression == "xz": - f = lzma.LZMAFile(dest_path, "w") + if lzma is None: + raise RuntimeError("lzma module not available.") + else: + f = lzma.LZMAFile(dest_path, "w") else: msg = "Unrecognized compression type: {}".format(compression) raise ValueError(msg) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 805adf8a29b49..c40201a7f95b0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -212,7 +212,10 @@ def decompress_file(path, compression): elif compression == "bz2": f = bz2.BZ2File(path, "rb") elif compression == "xz": - f = lzma.LZMAFile(path, "rb") + if lzma is None: + raise RuntimeError("lzma module not available.") + else: + f = lzma.LZMAFile(path, "rb") elif compression == "zip": zip_file = zipfile.ZipFile(path) zip_names = zip_file.namelist() @@ -266,8 +269,10 @@ def write_to_compressed(compression, path, data, dest="test"): compress_method = bz2.BZ2File elif compression == "xz": lzma = import_lzma() - - compress_method = lzma.LZMAFile + if lzma is None: + raise RuntimeError("lmza module not available.") + else: + compress_method = lzma.LZMAFile else: msg = "Unrecognized compression type: {}".format(compression) raise ValueError(msg) From 6c53d80ede70858e488f8f187cc78c871b2c9fb2 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:29:14 -0400 Subject: [PATCH 04/19] Formatted with black. --- pandas/compat/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 6db620708b182..36b90a2c081ea 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -69,8 +69,10 @@ def is_platform_32bit(): def import_lzma(): import warnings + try: import lzma + return lzma except ImportError: msg = ( From 2415e1f784f791f85c05bee4856aaa342d293659 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:36:01 -0400 Subject: [PATCH 05/19] Raise RuntimeError when calling a method of lzma when lzma is not available. --- pandas/_libs/parsers.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 3853496299145..ca3ec43ac8130 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -646,6 +646,7 @@ cdef class TextReader: raise ValueError('Multiple files found in compressed ' 'zip file %s', str(zip_names)) elif self.compression == 'xz': + if lzma is None: raise RuntimeError("lzma module not available.") if isinstance(source, str): source = lzma.LZMAFile(source, 'rb') else: From 52cb8ed3ed29d448f1f42c1a4ddfbe12201e0182 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:51:55 -0400 Subject: [PATCH 06/19] Release not explaining solution to #27575. --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 637ac5c9c8bd1..19bc424a78f37 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -103,7 +103,7 @@ MultiIndex I/O ^^^ - +- Bug: importing `lzma` can fail when Python is not properly installed. For example, on MacOS, installing Python with `pyenv` when `xz` is not available does not raise any issues during installation. However, trying to `import pandas` afterwards will fail, since the module `lzma` will not be available. Usually, this can be solved by installing the appropriate Python dependencies, and then resintalling Python. This fix warns the user that Python was not properly installed during a `import pandas`. In this case, Pandas can still be used (not possible before this fix), but any call to the `lzma` module will raise a `RuntimeError`. (:issue: `27575`) - - - From 3401cd58368425052d782ec296ff69dd58f9e658 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 17:53:38 -0400 Subject: [PATCH 07/19] Moved import warnings to top. --- pandas/compat/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 36b90a2c081ea..6549402bd290b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -10,6 +10,8 @@ import platform import struct import sys +import warnings + PY35 = sys.version_info[:2] == (3, 5) PY36 = sys.version_info >= (3, 6) @@ -68,8 +70,6 @@ def is_platform_32bit(): def import_lzma(): - import warnings - try: import lzma From 10fecae89a5fde859b4fc7033a451cce62524615 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 18:13:59 -0400 Subject: [PATCH 08/19] Added test for import lzma. Test passes when lzma is not available and you try importing it. --- pandas/tests/io/test_compression.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index ce459ab24afe0..31866b075d5ca 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -1,5 +1,7 @@ import contextlib import os +import subprocess +import textwrap import warnings import pytest @@ -125,3 +127,15 @@ def test_compression_warning(compression_only): with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: df.to_csv(f, compression=compression_only) + + +def test_with_missing_lzma(): + # https://github.com/pandas-dev/pandas/issues/27575 + code = textwrap.dedent( + """\ + import sys + sys.modules['lzma'] = None + import pandas + """ + ) + subprocess.check_output(["python", "-c", code]) From c79e31b47acdec000e5f324a3b1da43a483361c8 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 12 Aug 2019 18:24:27 -0400 Subject: [PATCH 09/19] Improved explanation of solution to #27575. --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 19bc424a78f37..5054e40b527d2 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -103,7 +103,7 @@ MultiIndex I/O ^^^ -- Bug: importing `lzma` can fail when Python is not properly installed. For example, on MacOS, installing Python with `pyenv` when `xz` is not available does not raise any issues during installation. However, trying to `import pandas` afterwards will fail, since the module `lzma` will not be available. Usually, this can be solved by installing the appropriate Python dependencies, and then resintalling Python. This fix warns the user that Python was not properly installed during a `import pandas`. In this case, Pandas can still be used (not possible before this fix), but any call to the `lzma` module will raise a `RuntimeError`. (:issue: `27575`) +- Bug: importing `lzma` can fail when Python is not properly installed. For example, on MacOS, installing Python with `pyenv` when `xz` is not available does not raise any issues during installation. However, trying to `import pandas` afterwards will fail, since the module `lzma` will not be available. Usually, this can be solved by installing the appropriate Python dependencies, and then resintalling Python. This fix warns the user that Python was not properly installed during a `import pandas`. In this case, Pandas can still be used (not possible before this fix), but any call to the `lzma` module will raise a `RuntimeError`. The solution was to check for an `ImportError` when importing `lzma`. If we get an `ImportError`, then we set `lzma` to `None`. When we actually need to call some method of `lzma`, we first check `lzma is None` and raise a `RuntimeError` if true, otherwise `lzma` was imported and the code can run. (:issue: `27575`) - - - From 68de9de5877e60c0a53a92b92175f6a227a6a8ab Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 12:23:51 -0400 Subject: [PATCH 10/19] Fixed isort. --- pandas/compat/__init__.py | 1 - pandas/io/common.py | 2 +- pandas/tests/io/test_pickle.py | 2 +- pandas/util/testing.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 6549402bd290b..71f072e4cd9fe 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -12,7 +12,6 @@ import sys import warnings - PY35 = sys.version_info[:2] == (3, 5) PY36 = sys.version_info >= (3, 6) PY37 = sys.version_info >= (3, 7) diff --git a/pandas/io/common.py b/pandas/io/common.py index a6add81bb5b7b..3c18d140b2cab 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -22,6 +22,7 @@ from urllib.request import pathname2url, urlopen import zipfile +from pandas.compat import import_lzma from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, @@ -30,7 +31,6 @@ ParserWarning, ) -from pandas.compat import import_lzma from pandas.core.dtypes.common import is_file_like from pandas._typing import FilePathOrBuffer diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f8eace47b44aa..e702d27b856e0 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -21,7 +21,7 @@ import pytest -from pandas.compat import is_platform_little_endian, import_lzma +from pandas.compat import import_lzma, is_platform_little_endian import pandas as pd from pandas import Index diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c40201a7f95b0..494a131816b06 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -25,7 +25,7 @@ ) import pandas._libs.testing as _testing -from pandas.compat import raise_with_traceback, import_lzma +from pandas.compat import import_lzma, raise_with_traceback from pandas.core.dtypes.common import ( is_bool, From 7656b344ffac933e542ade564cec8dfe61885f1c Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 12:26:58 -0400 Subject: [PATCH 11/19] Removed remains from a merge. --- doc/source/whatsnew/v0.25.1.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index ce9d81c0d2e3f..25bea4b28794d 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -106,7 +106,6 @@ I/O - Bug: importing `lzma` can fail when Python is not properly installed. For example, on MacOS, installing Python with `pyenv` when `xz` is not available does not raise any issues during installation. However, trying to `import pandas` afterwards will fail, since the module `lzma` will not be available. Usually, this can be solved by installing the appropriate Python dependencies, and then resintalling Python. This fix warns the user that Python was not properly installed during a `import pandas`. In this case, Pandas can still be used (not possible before this fix), but any call to the `lzma` module will raise a `RuntimeError`. The solution was to check for an `ImportError` when importing `lzma`. If we get an `ImportError`, then we set `lzma` to `None`. When we actually need to call some method of `lzma`, we first check `lzma is None` and raise a `RuntimeError` if true, otherwise `lzma` was imported and the code can run. (:issue: `27575`) - - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) ->>>>>>> 35821a5794e434117b14797f010602c8e412b36c - - From e74dc3013be53efa6594908fdedc6def08db11a5 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 12:44:36 -0400 Subject: [PATCH 12/19] Moved I/O and LZMA bug fix to a separate section. --- doc/source/whatsnew/v0.25.1.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 25bea4b28794d..fd83222111330 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -103,8 +103,6 @@ MultiIndex I/O ^^^ -- Bug: importing `lzma` can fail when Python is not properly installed. For example, on MacOS, installing Python with `pyenv` when `xz` is not available does not raise any issues during installation. However, trying to `import pandas` afterwards will fail, since the module `lzma` will not be available. Usually, this can be solved by installing the appropriate Python dependencies, and then resintalling Python. This fix warns the user that Python was not properly installed during a `import pandas`. In this case, Pandas can still be used (not possible before this fix), but any call to the `lzma` module will raise a `RuntimeError`. The solution was to check for an `ImportError` when importing `lzma`. If we get an `ImportError`, then we set `lzma` to `None`. When we actually need to call some method of `lzma`, we first check `lzma is None` and raise a `RuntimeError` if true, otherwise `lzma` was imported and the code can run. (:issue: `27575`) -- - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) - - @@ -161,6 +159,15 @@ Other - - +I/O and LZMA +~~~~~~~~~~~~ + +- Issue: Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` fails due to an `ImportError`; (:issue: `27575`) +- Change: Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`; +- Possible Fix: Ensure you have the necessary libraries and reinstall Python; +- Example: On MacOS, installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. + + .. _whatsnew_0.251.contributors: Contributors From 881fc12e7fa9c82c62a1e2a7649f33a207ef330f Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 12:49:25 -0400 Subject: [PATCH 13/19] Unecessary import. --- pandas/util/testing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 494a131816b06..b25a1339377b8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -268,7 +268,6 @@ def write_to_compressed(compression, path, data, dest="test"): compress_method = bz2.BZ2File elif compression == "xz": - lzma = import_lzma() if lzma is None: raise RuntimeError("lmza module not available.") else: From 2e4e4225f08ff3d549992fcc50c73065e5d40c09 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 13:04:22 -0400 Subject: [PATCH 14/19] Updated RuntimeError message to alert user that a re-install might be required. --- pandas/_libs/parsers.pyx | 6 +++++- pandas/io/common.py | 7 ++++++- pandas/tests/io/test_pickle.py | 7 ++++++- pandas/util/testing.py | 14 ++++++++++++-- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ca3ec43ac8130..dca925ac99179 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -646,7 +646,11 @@ cdef class TextReader: raise ValueError('Multiple files found in compressed ' 'zip file %s', str(zip_names)) elif self.compression == 'xz': - if lzma is None: raise RuntimeError("lzma module not available.") + if lzma is None: + raise RuntimeError("lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to " + "solve this issue.") if isinstance(source, str): source = lzma.LZMAFile(source, 'rb') else: diff --git a/pandas/io/common.py b/pandas/io/common.py index 3c18d140b2cab..5bb47a45dea5e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -398,7 +398,12 @@ def _get_handle( # XZ Compression elif compression == "xz": if lzma is None: - raise RuntimeError("lzma module not available.") + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to " + "solve this issue." + ) else: f = lzma.LZMAFile(path_or_buf, mode) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index e702d27b856e0..aefd550489d4d 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -272,7 +272,12 @@ def compress_file(self, src_path, dest_path, compression): f.write(src_path, os.path.basename(src_path)) elif compression == "xz": if lzma is None: - raise RuntimeError("lzma module not available.") + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to " + "solve this issue." + ) else: f = lzma.LZMAFile(dest_path, "w") else: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b25a1339377b8..131471fcd61e3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -213,7 +213,12 @@ def decompress_file(path, compression): f = bz2.BZ2File(path, "rb") elif compression == "xz": if lzma is None: - raise RuntimeError("lzma module not available.") + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to " + "solve this issue." + ) else: f = lzma.LZMAFile(path, "rb") elif compression == "zip": @@ -269,7 +274,12 @@ def write_to_compressed(compression, path, data, dest="test"): compress_method = bz2.BZ2File elif compression == "xz": if lzma is None: - raise RuntimeError("lmza module not available.") + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to " + "solve this issue." + ) else: compress_method = lzma.LZMAFile else: From 351d8e45f7153779b828dd54fc1b9661d439ed96 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 14:54:55 -0400 Subject: [PATCH 15/19] Moved the check `lzma is None` to a function, which also raises RuntimeError if the check fails. Updated remainder of the code to use this function. --- pandas/_libs/parsers.pyx | 13 ++++--------- pandas/compat/__init__.py | 17 ++++++++++++++--- pandas/io/common.py | 14 +++----------- pandas/tests/io/test_pickle.py | 14 +++----------- pandas/util/testing.py | 24 ++++-------------------- 5 files changed, 28 insertions(+), 54 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index dca925ac99179..6cc9dd22ce7c9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -58,11 +58,11 @@ from pandas.core.arrays import Categorical from pandas.core.dtypes.concat import union_categoricals import pandas.io.common as icom -from pandas.compat import import_lzma +from pandas.compat import _import_lzma, _get_lzma_file from pandas.errors import (ParserError, DtypeWarning, EmptyDataError, ParserWarning) -lzma = import_lzma() +lzma = _import_lzma() # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. @@ -646,15 +646,10 @@ cdef class TextReader: raise ValueError('Multiple files found in compressed ' 'zip file %s', str(zip_names)) elif self.compression == 'xz': - if lzma is None: - raise RuntimeError("lzma module not available. " - "A Python re-install with the proper " - "dependencies might be required to " - "solve this issue.") if isinstance(source, str): - source = lzma.LZMAFile(source, 'rb') + source = _get_lzma_file(lzma)(source, 'rb') else: - source = lzma.LZMAFile(filename=source) + source = _get_lzma_file(lzma)(filename=source) else: raise ValueError('Unrecognized compression type: %s' % self.compression) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 71f072e4cd9fe..1fa0e1b148860 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -68,14 +68,25 @@ def is_platform_32bit(): return struct.calcsize("P") * 8 < 64 -def import_lzma(): +def _import_lzma(): try: import lzma return lzma except ImportError: msg = ( - "Could not import the lzma module. Your installed Python is incomplete. " - "Attempting to use `lzma` compression will result in a RuntimeError." + "Could not import the lzma module. " + "Your installed Python is incomplete. " + "Attempting to use lzma compression will result in a RuntimeError." ) warnings.warn(msg) + + +def _get_lzma_file(lzma): + if lzma is None: + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper " + "dependencies might be required to solve this issue." + ) + return lzma.LZMAFile diff --git a/pandas/io/common.py b/pandas/io/common.py index 5bb47a45dea5e..ac57cef372399 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -22,7 +22,7 @@ from urllib.request import pathname2url, urlopen import zipfile -from pandas.compat import import_lzma +from pandas.compat import _get_lzma_file, _import_lzma from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, @@ -35,7 +35,7 @@ from pandas._typing import FilePathOrBuffer -lzma = import_lzma() +lzma = _import_lzma() # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -397,15 +397,7 @@ def _get_handle( # XZ Compression elif compression == "xz": - if lzma is None: - raise RuntimeError( - "lzma module not available. " - "A Python re-install with the proper " - "dependencies might be required to " - "solve this issue." - ) - else: - f = lzma.LZMAFile(path_or_buf, mode) + f = _get_lzma_file(lzma)(path_or_buf, mode) # Unrecognized Compression else: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index aefd550489d4d..30555508f0998 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -21,7 +21,7 @@ import pytest -from pandas.compat import import_lzma, is_platform_little_endian +from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian import pandas as pd from pandas import Index @@ -29,7 +29,7 @@ from pandas.tseries.offsets import Day, MonthEnd -lzma = import_lzma() +lzma = _import_lzma() @pytest.fixture(scope="module") @@ -271,15 +271,7 @@ def compress_file(self, src_path, dest_path, compression): with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f: f.write(src_path, os.path.basename(src_path)) elif compression == "xz": - if lzma is None: - raise RuntimeError( - "lzma module not available. " - "A Python re-install with the proper " - "dependencies might be required to " - "solve this issue." - ) - else: - f = lzma.LZMAFile(dest_path, "w") + f = _get_lzma_file(lzma)(dest_path, "w") else: msg = "Unrecognized compression type: {}".format(compression) raise ValueError(msg) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 131471fcd61e3..a8f0d0da52e1f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -25,7 +25,7 @@ ) import pandas._libs.testing as _testing -from pandas.compat import import_lzma, raise_with_traceback +from pandas.compat import _get_lzma_file, _import_lzma, raise_with_traceback from pandas.core.dtypes.common import ( is_bool, @@ -69,7 +69,7 @@ from pandas.io.common import urlopen from pandas.io.formats.printing import pprint_thing -lzma = import_lzma() +lzma = _import_lzma() N = 30 K = 4 @@ -212,15 +212,7 @@ def decompress_file(path, compression): elif compression == "bz2": f = bz2.BZ2File(path, "rb") elif compression == "xz": - if lzma is None: - raise RuntimeError( - "lzma module not available. " - "A Python re-install with the proper " - "dependencies might be required to " - "solve this issue." - ) - else: - f = lzma.LZMAFile(path, "rb") + f = _get_lzma_file(lzma)(path, "rb") elif compression == "zip": zip_file = zipfile.ZipFile(path) zip_names = zip_file.namelist() @@ -273,15 +265,7 @@ def write_to_compressed(compression, path, data, dest="test"): compress_method = bz2.BZ2File elif compression == "xz": - if lzma is None: - raise RuntimeError( - "lzma module not available. " - "A Python re-install with the proper " - "dependencies might be required to " - "solve this issue." - ) - else: - compress_method = lzma.LZMAFile + compress_method = _get_lzma_file(lzma) else: msg = "Unrecognized compression type: {}".format(compression) raise ValueError(msg) From ea0fd694fa8e8f642c948c99620d3caac4edb604 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 14:55:54 -0400 Subject: [PATCH 16/19] Removed bulletpoints. --- doc/source/whatsnew/v0.25.1.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index fd83222111330..b8475b41a4950 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -162,11 +162,10 @@ Other I/O and LZMA ~~~~~~~~~~~~ -- Issue: Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` fails due to an `ImportError`; (:issue: `27575`) -- Change: Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`; -- Possible Fix: Ensure you have the necessary libraries and reinstall Python; -- Example: On MacOS, installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. - +Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`). +Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`. +A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python. +For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. .. _whatsnew_0.251.contributors: From d65110d4e5974b1a56bd149e19eeca92442bb8dc Mon Sep 17 00:00:00 2001 From: Guilherme Date: Tue, 13 Aug 2019 14:56:07 -0400 Subject: [PATCH 17/19] Added test that fails at Runtime when lzma module not available. Test succeeds if lzma module is available. --- pandas/tests/io/test_compression.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 31866b075d5ca..0422e06e5902c 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -130,6 +130,7 @@ def test_compression_warning(compression_only): def test_with_missing_lzma(): + """Tests if import pandas fails when lzma is not present.""" # https://github.com/pandas-dev/pandas/issues/27575 code = textwrap.dedent( """\ @@ -139,3 +140,16 @@ def test_with_missing_lzma(): """ ) subprocess.check_output(["python", "-c", code]) + + +def test_with_missing_lzma_runtime(): + """Tests if RuntimeError is hit when calling lzma without + having the module available.""" + code = textwrap.dedent( + """ + import sys + from pandas.compat import _import_lzma, _get_lzma_file + lzma = _import_lzma() + _get_lzma_file(lzma)""" + ) + subprocess.check_output(["python", "-c", code]) From 18405a660d420531002d5dac641f9aa0f7eab838 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Thu, 15 Aug 2019 09:41:15 -0400 Subject: [PATCH 18/19] Modified runtime test to make direct call to pandas function. --- pandas/tests/io/test_compression.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 0422e06e5902c..16ca1109f266c 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -130,7 +130,7 @@ def test_compression_warning(compression_only): def test_with_missing_lzma(): - """Tests if import pandas fails when lzma is not present.""" + """Tests if import pandas works when lzma is not present.""" # https://github.com/pandas-dev/pandas/issues/27575 code = textwrap.dedent( """\ @@ -148,8 +148,12 @@ def test_with_missing_lzma_runtime(): code = textwrap.dedent( """ import sys - from pandas.compat import _import_lzma, _get_lzma_file - lzma = _import_lzma() - _get_lzma_file(lzma)""" + import pytest + sys.modules['lzma'] = None + import pandas + df = pandas.DataFrame() + with pytest.raises(RuntimeError, match='lzma module'): + df.to_csv('foo.csv', compression='xz') + """ ) subprocess.check_output(["python", "-c", code]) From af64991191ec77cc511464735bb199ba0253c7b3 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Thu, 15 Aug 2019 09:46:08 -0400 Subject: [PATCH 19/19] Added docstring. --- pandas/compat/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 1fa0e1b148860..b32da8da3a1fb 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -69,6 +69,8 @@ def is_platform_32bit(): def _import_lzma(): + """Attempts to import lzma, warning the user when lzma is not available. + """ try: import lzma @@ -83,6 +85,9 @@ def _import_lzma(): def _get_lzma_file(lzma): + """Returns the lzma method LZMAFile when the module was correctly imported. + Otherwise, raises a RuntimeError. + """ if lzma is None: raise RuntimeError( "lzma module not available. "