From d140f375e7dfe16f01d802a9571eb4c01b99ee83 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <roman.yurchak-malt@credit-agricole-sa.fr>
Date: Fri, 10 Sep 2021 11:19:29 +0200
Subject: [PATCH 01/10] ENH Remove import time warning for missing lzma

---
 pandas/_testing/_io.py              |  5 +----
 pandas/compat/__init__.py           | 25 ++++---------------------
 pandas/io/common.py                 |  4 +---
 pandas/tests/io/test_compression.py | 13 +++++++++++++
 pandas/tests/io/test_pickle.py      |  5 +----
 pandas/tests/io/test_stata.py       |  2 +-
 pandas/tests/io/xml/test_xml.py     |  4 ++--
 7 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
index a0b6963cfac97..5a84e8b892093 100644
--- a/pandas/_testing/_io.py
+++ b/pandas/_testing/_io.py
@@ -15,7 +15,6 @@
 )
 from pandas.compat import (
     get_lzma_file,
-    import_lzma,
 )
 
 import pandas as pd
@@ -26,8 +25,6 @@
 
 _RAISE_NETWORK_ERROR_DEFAULT = False
 
-lzma = import_lzma()
-
 # skip tests on exceptions with these messages
 _network_error_messages = (
     # 'urlopen error timed out',
@@ -387,7 +384,7 @@ def write_to_compressed(compression, path, data, dest="test"):
     elif compression == "bz2":
         compress_method = bz2.BZ2File
     elif compression == "xz":
-        compress_method = get_lzma_file(lzma)
+        compress_method = get_lzma_file()
     else:
         raise ValueError(f"Unrecognized compression type: {compression}")
 
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 3233de8e3b6d1..85beeb702e1ff 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -102,27 +102,7 @@ def is_platform_arm() -> bool:
     return platform.machine() in ("arm64", "aarch64")
 
 
-def import_lzma():
-    """
-    Importing the `lzma` module.
-
-    Warns
-    -----
-    When the `lzma` module is not available.
-    """
-    try:
-        import lzma
-
-        return lzma
-    except ImportError:
-        msg = (
-            "Could not import the lzma module. Your installed Python is incomplete. "
-            "Attempting to use lzma compression will result in a RuntimeError."
-        )
-        warnings.warn(msg)
-
-
-def get_lzma_file(lzma):
+def get_lzma_file():
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -137,6 +117,9 @@ def get_lzma_file(lzma):
         If the `lzma` module was not imported correctly, or didn't exist.
     """
     if lzma is None:
+    try:
+        import lzma
+    except ImportError:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 1058e98445284..1fc7872b3fb62 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -51,8 +51,6 @@
 
 from pandas.core.dtypes.common import is_file_like
 
-lzma = import_lzma()
-
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
 _VALID_URLS.discard("")
 
@@ -685,7 +683,7 @@ def get_handle(
 
         # XZ Compression
         elif compression == "xz":
-            handle = get_lzma_file(lzma)(handle, ioargs.mode)
+            handle = get_lzma_file()(handle, ioargs.mode)
 
         # Unrecognized Compression
         else:
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 6c90830639061..cda3df37406c3 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -191,6 +191,19 @@ def test_with_missing_lzma():
     subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
 
 
+def test_lzma_not_imported():
+    """Check that lzma is not imported by default"""
+    # https://github.com/pandas-dev/pandas/issues/43461
+    code = textwrap.dedent(
+        """\
+        import sys
+        import pandas
+        assert "lzma" not in sys.modules
+        """
+    )
+    subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
+
+
 def test_with_missing_lzma_runtime():
     """Tests if RuntimeError is hit when calling lzma without
     having the module available.
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 9253e5ae700c7..f2f9f403dbbf1 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -33,7 +33,6 @@
 
 from pandas.compat import (
     get_lzma_file,
-    import_lzma,
     is_platform_little_endian,
 )
 import pandas.util._test_decorators as td
@@ -51,8 +50,6 @@
     MonthEnd,
 )
 
-lzma = import_lzma()
-
 
 # TODO(ArrayManager) pickling
 pytestmark = [
@@ -313,7 +310,7 @@ def compress_file(self, src_path, dest_path, compression):
             with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
                 f.write(src_path, os.path.basename(src_path))
         elif compression == "xz":
-            f = get_lzma_file(lzma)(dest_path, "w")
+            f = get_lzma_file()(dest_path, "w")
         else:
             msg = f"Unrecognized compression type: {compression}"
             raise ValueError(msg)
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 02cf478c61583..9f7f8b7333eb9 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -3,7 +3,6 @@
 from datetime import datetime
 import gzip
 import io
-import lzma
 import os
 import struct
 import warnings
@@ -1904,6 +1903,7 @@ def test_compression(compression, version, use_dict, infer):
             with bz2.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression == "xz":
+            lzma = pytest.importorskip('lzma')
             with lzma.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression is None:
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 7e9a03c2a59a8..8c465fbc4d6b4 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1044,12 +1044,12 @@ def test_wrong_compression_gz(parser, comp):
 
 @pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"])
 def test_wrong_compression_xz(parser, comp):
-    from lzma import LZMAError
+    lzma = pytest.imortorskip('lzma')
 
     with tm.ensure_clean() as path:
         geom_df.to_xml(path, parser=parser, compression=comp)
 
-        with pytest.raises(LZMAError, match="Input format not supported by decoder"):
+        with pytest.raises(lzma.LZMAError, match="Input format not supported by decoder"):
             read_xml(path, parser=parser, compression="xz")
 
 

From 73416452bd01b029120c8c95aeffadb1eff1029b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <roman.yurchak-malt@credit-agricole-sa.fr>
Date: Fri, 10 Sep 2021 11:36:53 +0200
Subject: [PATCH 02/10] Lint

---
 pandas/compat/__init__.py       | 2 --
 pandas/tests/io/xml/test_xml.py | 6 ++++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 85beeb702e1ff..a6a344418c67b 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -9,7 +9,6 @@
 """
 import platform
 import sys
-import warnings
 
 from pandas._typing import F
 from pandas.compat.numpy import (
@@ -116,7 +115,6 @@ def get_lzma_file():
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if lzma is None:
     try:
         import lzma
     except ImportError:
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 8c465fbc4d6b4..4692b5535bfa7 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1044,12 +1044,14 @@ def test_wrong_compression_gz(parser, comp):
 
 @pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"])
 def test_wrong_compression_xz(parser, comp):
-    lzma = pytest.imortorskip('lzma')
+    lzma = pytest.importorskip("lzma")
 
     with tm.ensure_clean() as path:
         geom_df.to_xml(path, parser=parser, compression=comp)
 
-        with pytest.raises(lzma.LZMAError, match="Input format not supported by decoder"):
+        with pytest.raises(
+            lzma.LZMAError, match="Input format not supported by decoder"
+        ):
             read_xml(path, parser=parser, compression="xz")
 
 

From a4b65e127d1b432428f47d006322cc54d361ad20 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <roman.yurchak-malt@credit-agricole-sa.fr>
Date: Fri, 10 Sep 2021 11:46:59 +0200
Subject: [PATCH 03/10] More fixes

---
 pandas/io/common.py           | 5 +----
 pandas/tests/io/test_stata.py | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 1fc7872b3fb62..a70007d77a81a 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -43,10 +43,7 @@
     FilePathOrBuffer,
     StorageOptions,
 )
-from pandas.compat import (
-    get_lzma_file,
-    import_lzma,
-)
+from pandas.compat import get_lzma_file
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import is_file_like
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 9f7f8b7333eb9..35ba2731d091f 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1903,7 +1903,7 @@ def test_compression(compression, version, use_dict, infer):
             with bz2.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression == "xz":
-            lzma = pytest.importorskip('lzma')
+            lzma = pytest.importorskip("lzma")
             with lzma.open(path, "rb") as comp:
                 fp = io.BytesIO(comp.read())
         elif compression is None:

From 4885afb0c614af58ba6872d4cff5d04189dcf10b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 8 Oct 2021 13:30:37 +0200
Subject: [PATCH 04/10] Address review comments

---
 ci/code_checks.sh                   |  2 +-
 doc/source/whatsnew/v1.4.0.rst      |  1 +
 pandas/tests/io/test_compression.py | 13 -------------
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7e4b5775af317..b4a7c86ce3551 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -47,7 +47,7 @@ import pandas
 
 blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt', 'lzma'}
 
 # GH#28227 for some of these check for top-level modules, while others are
 #  more specific (e.g. urllib.request)
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 5d7bf4fc36e9e..59aea60373120 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -390,6 +390,7 @@ I/O
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+- Bug when Python was built without lzma module: a warning was raised at the pandas import time, even if the lzma module wasn't used. (:issue:`43495`)
 
 Period
 ^^^^^^
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index cda3df37406c3..6c90830639061 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -191,19 +191,6 @@ def test_with_missing_lzma():
     subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
 
 
-def test_lzma_not_imported():
-    """Check that lzma is not imported by default"""
-    # https://github.com/pandas-dev/pandas/issues/43461
-    code = textwrap.dedent(
-        """\
-        import sys
-        import pandas
-        assert "lzma" not in sys.modules
-        """
-    )
-    subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
-
-
 def test_with_missing_lzma_runtime():
     """Tests if RuntimeError is hit when calling lzma without
     having the module available.

From 10bc68591ae238e802933d2b5fef8d9cd702c413 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 8 Oct 2021 13:44:43 +0200
Subject: [PATCH 05/10] Mention it in install.rst

---
 doc/source/getting_started/install.rst | 1 +
 pandas/_testing/_io.py                 | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 20ae37c85a9d9..83a50d0f84a7c 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -362,6 +362,7 @@ zlib                                         Compression for HDF5
 fastparquet               0.4.0              Parquet reading / writing
 pyarrow                   0.17.0             Parquet, ORC, and feather reading / writing
 pyreadstat                                   SPSS files (.sav) reading
+Python with lzma module   None               Reading files with .lzma or .xz compression
 ========================= ================== =============================================================
 
 .. _install.warn_orc:
diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
index ec59db7d28870..0df4507964078 100644
--- a/pandas/_testing/_io.py
+++ b/pandas/_testing/_io.py
@@ -11,9 +11,7 @@
 import zipfile
 
 from pandas._typing import FilePathOrBuffer
-from pandas.compat import (
-    get_lzma_file,
-)
+from pandas.compat import get_lzma_file
 
 import pandas as pd
 from pandas._testing._random import rands

From 3155c3e8f4424b3acfae7073048e234ccace69d5 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 8 Oct 2021 13:49:06 +0200
Subject: [PATCH 06/10] Improve wording

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index f7e27f493034f..a999c74abd88b 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -474,7 +474,7 @@ I/O
 - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
-- Bug when Python was built without lzma module: a warning was raised at the pandas import time, even if the lzma module wasn't used. (:issue:`43495`)
+- Bug when Python is built without lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used. (:issue:`43495`)
 -
 
 Period

From 887b519211c0337e13dd1a969cfde5a9c759471d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 8 Oct 2021 14:08:54 +0200
Subject: [PATCH 07/10] More lint fixes

---
 pandas/tests/io/test_pickle.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index f2f9f403dbbf1..2c292323de16e 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -50,7 +50,6 @@
     MonthEnd,
 )
 
-
 # TODO(ArrayManager) pickling
 pytestmark = [
     td.skip_array_manager_not_yet_implemented,

From 4bf231e12f35f86ed2dc9cf1d33da492b20513da Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 6 Nov 2021 12:00:04 +0100
Subject: [PATCH 08/10] Address review comment

---
 doc/source/getting_started/install.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 8daa961fae48b..da70549687594 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -362,7 +362,6 @@ zlib                                         Compression for HDF5
 fastparquet               0.4.0              Parquet reading / writing
 pyarrow                   1.0.1              Parquet, ORC, and feather reading / writing
 pyreadstat                                   SPSS files (.sav) reading
-Python with lzma module   None               Reading files with .lzma or .xz compression
 ========================= ================== =============================================================
 
 .. _install.warn_orc:

From 986a549732e9bd16284b8d919f30824a49665424 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sun, 28 Nov 2021 22:35:56 +0100
Subject: [PATCH 09/10] Remove lzma from the blocklist

---
 ci/code_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4fae11cc96008..503120e486f5a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -47,7 +47,7 @@ import pandas
 
 blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt', 'lzma'}
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
 
 # GH#28227 for some of these check for top-level modules, while others are
 #  more specific (e.g. urllib.request)

From 02d6e5de4678851402d1102eb5a7e2cfe58db1f5 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sun, 28 Nov 2021 22:37:26 +0100
Subject: [PATCH 10/10] Fix merge conflict

---
 doc/source/whatsnew/v1.4.0.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index b50e3dfc46605..b4502912b72e5 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -667,11 +667,8 @@ I/O
 - Bug in :func:`read_csv` used second row to guess implicit index if ``header`` was set to ``None`` for ``engine="python"`` (:issue:`22144`)
 - Bug in :func:`read_csv` not recognizing bad lines when ``names`` were given for ``engine="c"`` (:issue:`22144`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
-<<<<<<< HEAD
 - Bug when Python is built without lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used. (:issue:`43495`)
-=======
 - Bug in :func:`read_csv` not applying dtype for ``index_col`` (:issue:`9435`)
->>>>>>> upstream/master
 - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
 - Bug in :func:`read_csv` raising ``ValueError`` when names was longer than header but equal to data rows for ``engine="python"`` (:issue:`38453`)
 - Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`)