Skip to content

BUG: lzma is a required part of python, make it optional #27882

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Aug 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c1a5326
Importing lzma when Python has been compiled without its support will
guilherme-salome Aug 12, 2019
f9b420b
Substituted import lzma to call to import_lzma
guilherme-salome Aug 12, 2019
998b5aa
Calls to lzma module will raise a RuntimeError.
guilherme-salome Aug 12, 2019
6c53d80
Formatted with black.
guilherme-salome Aug 12, 2019
2415e1f
Raise RuntimeError when calling a method of lzma when lzma is not ava…
guilherme-salome Aug 12, 2019
52cb8ed
Release not explaining solution to #27575.
guilherme-salome Aug 12, 2019
3401cd5
Moved import warnings to top.
guilherme-salome Aug 12, 2019
10fecae
Added test for import lzma. Test passes when lzma is not available and
guilherme-salome Aug 12, 2019
c79e31b
Improved explanation of solution to #27575.
guilherme-salome Aug 12, 2019
f22bf4f
Merge branch 'master' of https://github.com/pandas-dev/pandas into lz…
guilherme-salome Aug 12, 2019
68de9de
Fixed isort.
guilherme-salome Aug 13, 2019
7656b34
Removed remains from a merge.
guilherme-salome Aug 13, 2019
e74dc30
Moved I/O and LZMA bug fix to a separate section.
guilherme-salome Aug 13, 2019
881fc12
Unecessary import.
guilherme-salome Aug 13, 2019
2e4e422
Updated RuntimeError message to alert user that a re-install might be…
guilherme-salome Aug 13, 2019
351d8e4
Moved the check `lzma is None` to a function, which also raises
guilherme-salome Aug 13, 2019
ea0fd69
Removed bulletpoints.
guilherme-salome Aug 13, 2019
d65110d
Added test that fails at Runtime when lzma module not available. Test
guilherme-salome Aug 13, 2019
18405a6
Modified runtime test to make direct call to pandas function.
guilherme-salome Aug 15, 2019
af64991
Added docstring.
guilherme-salome Aug 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ MultiIndex

I/O
^^^

- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
-
-
Expand Down Expand Up @@ -160,6 +159,14 @@ Other
-
-

I/O and LZMA
~~~~~~~~~~~~

Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`).
Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`.
A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python.
For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python.

.. _whatsnew_0.251.contributors:

Contributors
Expand Down
8 changes: 5 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# See LICENSE for the license
import bz2
import gzip
import lzma
import os
import sys
import time
Expand Down Expand Up @@ -59,9 +58,12 @@ from pandas.core.arrays import Categorical
from pandas.core.dtypes.concat import union_categoricals
import pandas.io.common as icom

from pandas.compat import _import_lzma, _get_lzma_file
from pandas.errors import (ParserError, DtypeWarning,
EmptyDataError, ParserWarning)

lzma = _import_lzma()

# Import CParserError as alias of ParserError for backwards compatibility.
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
CParserError = ParserError
Expand Down Expand Up @@ -645,9 +647,9 @@ cdef class TextReader:
'zip file %s', str(zip_names))
elif self.compression == 'xz':
if isinstance(source, str):
source = lzma.LZMAFile(source, 'rb')
source = _get_lzma_file(lzma)(source, 'rb')
else:
source = lzma.LZMAFile(filename=source)
source = _get_lzma_file(lzma)(filename=source)
else:
raise ValueError('Unrecognized compression type: %s' %
self.compression)
Expand Down
30 changes: 30 additions & 0 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import platform
import struct
import sys
import warnings

PY35 = sys.version_info[:2] == (3, 5)
PY36 = sys.version_info >= (3, 6)
Expand Down Expand Up @@ -65,3 +66,32 @@ def is_platform_mac():

def is_platform_32bit():
return struct.calcsize("P") * 8 < 64


def _import_lzma():
"""Attempts to import lzma, warning the user when lzma is not available.
"""
try:
import lzma

return lzma
except ImportError:
msg = (
"Could not import the lzma module. "
"Your installed Python is incomplete. "
"Attempting to use lzma compression will result in a RuntimeError."
)
warnings.warn(msg)


def _get_lzma_file(lzma):
"""Returns the lzma method LZMAFile when the module was correctly imported.
Otherwise, raises a RuntimeError.
"""
if lzma is None:
raise RuntimeError(
"lzma module not available. "
"A Python re-install with the proper "
"dependencies might be required to solve this issue."
)
return lzma.LZMAFile
6 changes: 4 additions & 2 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import gzip
from http.client import HTTPException # noqa
from io import BytesIO
import lzma
import mmap
import os
import pathlib
Expand All @@ -23,6 +22,7 @@
from urllib.request import pathname2url, urlopen
import zipfile

from pandas.compat import _get_lzma_file, _import_lzma
from pandas.errors import ( # noqa
AbstractMethodError,
DtypeWarning,
Expand All @@ -35,6 +35,8 @@

from pandas._typing import FilePathOrBuffer

lzma = _import_lzma()

# gh-12665: Alias for now and remove later.
CParserError = ParserError

Expand Down Expand Up @@ -395,7 +397,7 @@ def _get_handle(

# XZ Compression
elif compression == "xz":
f = lzma.LZMAFile(path_or_buf, mode)
f = _get_lzma_file(lzma)(path_or_buf, mode)

# Unrecognized Compression
else:
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import contextlib
import os
import subprocess
import textwrap
import warnings

import pytest
Expand Down Expand Up @@ -125,3 +127,33 @@ def test_compression_warning(compression_only):
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
with f:
df.to_csv(f, compression=compression_only)


def test_with_missing_lzma():
"""Tests if import pandas works when lzma is not present."""
# https://github.com/pandas-dev/pandas/issues/27575
code = textwrap.dedent(
"""\
import sys
sys.modules['lzma'] = None
import pandas
"""
)
subprocess.check_output(["python", "-c", code])


def test_with_missing_lzma_runtime():
"""Tests if RuntimeError is hit when calling lzma without
having the module available."""
code = textwrap.dedent(
"""
import sys
import pytest
sys.modules['lzma'] = None
import pandas
df = pandas.DataFrame()
with pytest.raises(RuntimeError, match='lzma module'):
df.to_csv('foo.csv', compression='xz')
"""
)
subprocess.check_output(["python", "-c", code])
7 changes: 4 additions & 3 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import bz2
import glob
import gzip
import lzma
import os
import pickle
import shutil
Expand All @@ -22,14 +21,16 @@

import pytest

from pandas.compat import is_platform_little_endian
from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian

import pandas as pd
from pandas import Index
import pandas.util.testing as tm

from pandas.tseries.offsets import Day, MonthEnd

lzma = _import_lzma()


@pytest.fixture(scope="module")
def current_pickle_data():
Expand Down Expand Up @@ -270,7 +271,7 @@ def compress_file(self, src_path, dest_path, compression):
with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
f.write(src_path, os.path.basename(src_path))
elif compression == "xz":
f = lzma.LZMAFile(dest_path, "w")
f = _get_lzma_file(lzma)(dest_path, "w")
else:
msg = "Unrecognized compression type: {}".format(compression)
raise ValueError(msg)
Expand Down
11 changes: 5 additions & 6 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from functools import wraps
import gzip
import http.client
import lzma
import os
import re
from shutil import rmtree
Expand All @@ -26,7 +25,7 @@
)

import pandas._libs.testing as _testing
from pandas.compat import raise_with_traceback
from pandas.compat import _get_lzma_file, _import_lzma, raise_with_traceback

from pandas.core.dtypes.common import (
is_bool,
Expand Down Expand Up @@ -70,6 +69,8 @@
from pandas.io.common import urlopen
from pandas.io.formats.printing import pprint_thing

lzma = _import_lzma()

N = 30
K = 4
_RAISE_NETWORK_ERROR_DEFAULT = False
Expand Down Expand Up @@ -211,7 +212,7 @@ def decompress_file(path, compression):
elif compression == "bz2":
f = bz2.BZ2File(path, "rb")
elif compression == "xz":
f = lzma.LZMAFile(path, "rb")
f = _get_lzma_file(lzma)(path, "rb")
elif compression == "zip":
zip_file = zipfile.ZipFile(path)
zip_names = zip_file.namelist()
Expand Down Expand Up @@ -264,9 +265,7 @@ def write_to_compressed(compression, path, data, dest="test"):

compress_method = bz2.BZ2File
elif compression == "xz":
import lzma

compress_method = lzma.LZMAFile
compress_method = _get_lzma_file(lzma)
else:
msg = "Unrecognized compression type: {}".format(compression)
raise ValueError(msg)
Expand Down