Skip to content

Commit ba94f9b

Browse files
guilherme-salomeTomAugspurger
authored andcommitted
BUG: lzma is a required part of python, make it optional (#27882)
* Importing lzma when Python has been compiled without its support will raise a warning. Substituted import lzma for helper function.
1 parent 35c44ce commit ba94f9b

File tree

7 files changed

+88
-15
lines changed

7 files changed

+88
-15
lines changed

doc/source/whatsnew/v0.25.1.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ MultiIndex
102102

103103
I/O
104104
^^^
105-
106105
- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
107106
- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`)
108107
-
@@ -159,6 +158,14 @@ Other
159158
-
160159
-
161160

161+
I/O and LZMA
162+
~~~~~~~~~~~~
163+
164+
Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`).
165+
Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`.
166+
A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python.
167+
For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python.
168+
162169
.. _whatsnew_0.251.contributors:
163170

164171
Contributors

pandas/_libs/parsers.pyx

+5-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# See LICENSE for the license
33
import bz2
44
import gzip
5-
import lzma
65
import os
76
import sys
87
import time
@@ -59,9 +58,12 @@ from pandas.core.arrays import Categorical
5958
from pandas.core.dtypes.concat import union_categoricals
6059
import pandas.io.common as icom
6160

61+
from pandas.compat import _import_lzma, _get_lzma_file
6262
from pandas.errors import (ParserError, DtypeWarning,
6363
EmptyDataError, ParserWarning)
6464

65+
lzma = _import_lzma()
66+
6567
# Import CParserError as alias of ParserError for backwards compatibility.
6668
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
6769
CParserError = ParserError
@@ -645,9 +647,9 @@ cdef class TextReader:
645647
'zip file %s', str(zip_names))
646648
elif self.compression == 'xz':
647649
if isinstance(source, str):
648-
source = lzma.LZMAFile(source, 'rb')
650+
source = _get_lzma_file(lzma)(source, 'rb')
649651
else:
650-
source = lzma.LZMAFile(filename=source)
652+
source = _get_lzma_file(lzma)(filename=source)
651653
else:
652654
raise ValueError('Unrecognized compression type: %s' %
653655
self.compression)

pandas/compat/__init__.py

+30
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import platform
1111
import struct
1212
import sys
13+
import warnings
1314

1415
PY35 = sys.version_info[:2] == (3, 5)
1516
PY36 = sys.version_info >= (3, 6)
@@ -65,3 +66,32 @@ def is_platform_mac():
6566

6667
def is_platform_32bit():
6768
return struct.calcsize("P") * 8 < 64
69+
70+
71+
def _import_lzma():
72+
"""Attempts to import lzma, warning the user when lzma is not available.
73+
"""
74+
try:
75+
import lzma
76+
77+
return lzma
78+
except ImportError:
79+
msg = (
80+
"Could not import the lzma module. "
81+
"Your installed Python is incomplete. "
82+
"Attempting to use lzma compression will result in a RuntimeError."
83+
)
84+
warnings.warn(msg)
85+
86+
87+
def _get_lzma_file(lzma):
88+
"""Returns the lzma method LZMAFile when the module was correctly imported.
89+
Otherwise, raises a RuntimeError.
90+
"""
91+
if lzma is None:
92+
raise RuntimeError(
93+
"lzma module not available. "
94+
"A Python re-install with the proper "
95+
"dependencies might be required to solve this issue."
96+
)
97+
return lzma.LZMAFile

pandas/io/common.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import gzip
77
from http.client import HTTPException # noqa
88
from io import BytesIO
9-
import lzma
109
import mmap
1110
import os
1211
import pathlib
@@ -23,6 +22,7 @@
2322
from urllib.request import pathname2url, urlopen
2423
import zipfile
2524

25+
from pandas.compat import _get_lzma_file, _import_lzma
2626
from pandas.errors import ( # noqa
2727
AbstractMethodError,
2828
DtypeWarning,
@@ -35,6 +35,8 @@
3535

3636
from pandas._typing import FilePathOrBuffer
3737

38+
lzma = _import_lzma()
39+
3840
# gh-12665: Alias for now and remove later.
3941
CParserError = ParserError
4042

@@ -395,7 +397,7 @@ def _get_handle(
395397

396398
# XZ Compression
397399
elif compression == "xz":
398-
f = lzma.LZMAFile(path_or_buf, mode)
400+
f = _get_lzma_file(lzma)(path_or_buf, mode)
399401

400402
# Unrecognized Compression
401403
else:

pandas/tests/io/test_compression.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import contextlib
22
import os
3+
import subprocess
4+
import textwrap
35
import warnings
46

57
import pytest
@@ -125,3 +127,33 @@ def test_compression_warning(compression_only):
125127
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
126128
with f:
127129
df.to_csv(f, compression=compression_only)
130+
131+
132+
def test_with_missing_lzma():
133+
"""Tests if import pandas works when lzma is not present."""
134+
# https://github.com/pandas-dev/pandas/issues/27575
135+
code = textwrap.dedent(
136+
"""\
137+
import sys
138+
sys.modules['lzma'] = None
139+
import pandas
140+
"""
141+
)
142+
subprocess.check_output(["python", "-c", code])
143+
144+
145+
def test_with_missing_lzma_runtime():
146+
"""Tests if RuntimeError is hit when calling lzma without
147+
having the module available."""
148+
code = textwrap.dedent(
149+
"""
150+
import sys
151+
import pytest
152+
sys.modules['lzma'] = None
153+
import pandas
154+
df = pandas.DataFrame()
155+
with pytest.raises(RuntimeError, match='lzma module'):
156+
df.to_csv('foo.csv', compression='xz')
157+
"""
158+
)
159+
subprocess.check_output(["python", "-c", code])

pandas/tests/io/test_pickle.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import bz2
1414
import glob
1515
import gzip
16-
import lzma
1716
import os
1817
import pickle
1918
import shutil
@@ -22,14 +21,16 @@
2221

2322
import pytest
2423

25-
from pandas.compat import is_platform_little_endian
24+
from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian
2625

2726
import pandas as pd
2827
from pandas import Index
2928
import pandas.util.testing as tm
3029

3130
from pandas.tseries.offsets import Day, MonthEnd
3231

32+
lzma = _import_lzma()
33+
3334

3435
@pytest.fixture(scope="module")
3536
def current_pickle_data():
@@ -270,7 +271,7 @@ def compress_file(self, src_path, dest_path, compression):
270271
with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
271272
f.write(src_path, os.path.basename(src_path))
272273
elif compression == "xz":
273-
f = lzma.LZMAFile(dest_path, "w")
274+
f = _get_lzma_file(lzma)(dest_path, "w")
274275
else:
275276
msg = "Unrecognized compression type: {}".format(compression)
276277
raise ValueError(msg)

pandas/util/testing.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from functools import wraps
66
import gzip
77
import http.client
8-
import lzma
98
import os
109
import re
1110
from shutil import rmtree
@@ -26,7 +25,7 @@
2625
)
2726

2827
import pandas._libs.testing as _testing
29-
from pandas.compat import raise_with_traceback
28+
from pandas.compat import _get_lzma_file, _import_lzma, raise_with_traceback
3029

3130
from pandas.core.dtypes.common import (
3231
is_bool,
@@ -70,6 +69,8 @@
7069
from pandas.io.common import urlopen
7170
from pandas.io.formats.printing import pprint_thing
7271

72+
lzma = _import_lzma()
73+
7374
N = 30
7475
K = 4
7576
_RAISE_NETWORK_ERROR_DEFAULT = False
@@ -211,7 +212,7 @@ def decompress_file(path, compression):
211212
elif compression == "bz2":
212213
f = bz2.BZ2File(path, "rb")
213214
elif compression == "xz":
214-
f = lzma.LZMAFile(path, "rb")
215+
f = _get_lzma_file(lzma)(path, "rb")
215216
elif compression == "zip":
216217
zip_file = zipfile.ZipFile(path)
217218
zip_names = zip_file.namelist()
@@ -264,9 +265,7 @@ def write_to_compressed(compression, path, data, dest="test"):
264265

265266
compress_method = bz2.BZ2File
266267
elif compression == "xz":
267-
import lzma
268-
269-
compress_method = lzma.LZMAFile
268+
compress_method = _get_lzma_file(lzma)
270269
else:
271270
msg = "Unrecognized compression type: {}".format(compression)
272271
raise ValueError(msg)

0 commit comments

Comments
 (0)