pandas-dev · jreback · Nov 28, 2021 · Sep 10, 2021 · Sep 10, 2021 · Sep 10, 2021
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -47,7 +47,7 @@ import pandas
 
 blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt', 'lzma'}
 
 # GH#28227 for some of these check for top-level modules, while others are
 #  more specific (e.g. urllib.request)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -362,6 +362,7 @@ zlib                                         Compression for HDF5
 fastparquet               0.4.0              Parquet reading / writing
 pyarrow                   0.17.0             Parquet, ORC, and feather reading / writing
 pyreadstat                                   SPSS files (.sav) reading
+Python with lzma module   None               Reading files with .lzma or .xz compression
 ========================= ================== =============================================================
 
 .. _install.warn_orc:

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -474,6 +474,7 @@ I/O
 - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
+- Bug when Python is built without lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used. (:issue:`43495`)
 -
 
 Period

diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
@@ -11,10 +11,7 @@
 import zipfile
 
 from pandas._typing import FilePathOrBuffer
-from pandas.compat import (
-    get_lzma_file,
-    import_lzma,
-)
+from pandas.compat import get_lzma_file
 
 import pandas as pd
 from pandas._testing._random import rands
@@ -30,8 +27,6 @@
 
 _RAISE_NETWORK_ERROR_DEFAULT = False
 
-lzma = import_lzma()
-
 # skip tests on exceptions with these messages
 _network_error_messages = (
     # 'urlopen error timed out',
@@ -394,7 +389,7 @@ def write_to_compressed(compression, path, data, dest="test"):
     elif compression == "bz2":
         compress_method = bz2.BZ2File
     elif compression == "xz":
-        compress_method = get_lzma_file(lzma)
+        compress_method = get_lzma_file()
     else:
         raise ValueError(f"Unrecognized compression type: {compression}")
 

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -9,7 +9,6 @@
 """
 import platform
 import sys
-import warnings
 
 from pandas._typing import F
 from pandas.compat.numpy import (
@@ -102,27 +101,7 @@ def is_platform_arm() -> bool:
     return platform.machine() in ("arm64", "aarch64")
 
 
-def import_lzma():
-    """
-    Importing the `lzma` module.
-
-    Warns
-    -----
-    When the `lzma` module is not available.
-    """
-    try:
-        import lzma
-
-        return lzma
-    except ImportError:
-        msg = (
-            "Could not import the lzma module. Your installed Python is incomplete. "
-            "Attempting to use lzma compression will result in a RuntimeError."
-        )
-        warnings.warn(msg)
-
-
-def get_lzma_file(lzma):
+def get_lzma_file():
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -136,7 +115,9 @@ def get_lzma_file(lzma):
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if lzma is None:
+    try:
+        import lzma
+    except ImportError:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -44,16 +44,11 @@
     FilePathOrBuffer,
     StorageOptions,
 )
-from pandas.compat import (
-    get_lzma_file,
-    import_lzma,
-)
+from pandas.compat import get_lzma_file
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import is_file_like
 
-lzma = import_lzma()
-
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
 _VALID_URLS.discard("")
 
@@ -687,7 +682,7 @@ def get_handle(
 
         # XZ Compression
         elif compression == "xz":
-            handle = get_lzma_file(lzma)(handle, ioargs.mode)
+            handle = get_lzma_file()(handle, ioargs.mode)
 
         # Unrecognized Compression
         else:

diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -33,7 +33,6 @@
 
 from pandas.compat import (
     get_lzma_file,
-    import_lzma,
     is_platform_little_endian,
 )
 import pandas.util._test_decorators as td
@@ -51,9 +50,6 @@
     MonthEnd,
 )
 
-lzma = import_lzma()
-
-
 # TODO(ArrayManager) pickling
 pytestmark = [
     td.skip_array_manager_not_yet_implemented,
@@ -313,7 +309,7 @@ def compress_file(self, src_path, dest_path, compression):
             with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
                 f.write(src_path, os.path.basename(src_path))
         elif compression == "xz":
-            f = get_lzma_file(lzma)(dest_path, "w")
+            f = get_lzma_file()(dest_path, "w")
         else:
             msg = f"Unrecognized compression type: {compression}"
             raise ValueError(msg)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -3,7 +3,6 @@
 from datetime import datetime
 import gzip
 import io
-import lzma
 import os
 import struct
 import warnings
@@ -1904,6 +1903,7 @@ def test_compression(compression, version, use_dict, infer):
             with bz2.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression == "xz":
+            lzma = pytest.importorskip("lzma")
             with lzma.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression is None:

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -1044,12 +1044,14 @@ def test_wrong_compression_gz(parser, comp):
 
 @pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"])
 def test_wrong_compression_xz(parser, comp):
-    from lzma import LZMAError
+    lzma = pytest.importorskip("lzma")
 
     with tm.ensure_clean() as path:
         geom_df.to_xml(path, parser=parser, compression=comp)
 
-        with pytest.raises(LZMAError, match="Input format not supported by decoder"):
+        with pytest.raises(
+            lzma.LZMAError, match="Input format not supported by decoder"
+        ):
             read_xml(path, parser=parser, compression="xz")