From 8736b20db28fe9977fb75433a64b755a09878ae5 Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Wed, 10 Feb 2021 20:43:13 -0500 Subject: [PATCH 1/6] BUG: Fix pd.show_versions as_json invalid JSON (#39701) --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/util/test_show_versions.py | 70 +++++++++++++++++++++---- pandas/util/_print_versions.py | 2 +- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..07678317ebb07 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -461,6 +461,7 @@ Other - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`) - :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`) - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) +- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index b6a16d027db77..e686b3a7a9cc3 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -1,7 +1,11 @@ +import json +import os import re import pytest +from pandas.util._print_versions import _get_dependency_info, _get_sys_info + import pandas as pd @@ -26,21 +30,65 @@ "ignore:Distutils:UserWarning" ) @pytest.mark.filterwarnings("ignore:Setuptools is replacing distutils:UserWarning") -def test_show_versions(capsys): +@pytest.mark.parametrize("as_json", [True, False, "test_output.json"]) +def test_show_versions(capsys, as_json, tmpdir): # gh-32041 - pd.show_versions() + if isinstance(as_json, str): + as_json = os.path.join(tmpdir, as_json) + + pd.show_versions(as_json=as_json) captured = capsys.readouterr() result = captured.out - # check header - assert "INSTALLED VERSIONS" in result + # check header for non-JSON console output + if as_json is False: + assert "INSTALLED VERSIONS" in result + + # check full commit hash + assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) + + # check required dependency + # 2020-12-09 npdev has "dirty" in the tag + assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) + + # check optional dependency + assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + + # Dictionary-based asserts + else: + # check valid json is printed to the console if as_json is True + if as_json is True: + dict_check = json.loads(result) + elif isinstance(as_json, str): + # make sure that the file was created + assert os.path.exists(as_json) + + with open(as_json) as fd: + contents = fd.readlines() + str_contents = "".join(contents) + + # make sure that there was output to the file + assert str_contents + + # check if file output is valid JSON + dict_check = json.loads(str_contents) + + # Basic check that each version element is found in output + version_elements = { + "system": _get_sys_info(), + "dependencies": _get_dependency_info(), + } + + assert version_elements == dict_check + - # check full commit hash - assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) +def test_json_output_match(capsys, tmpdir): + pd.show_versions(as_json=True) + result_console = capsys.readouterr().out - # check required dependency - # 2020-12-09 npdev has "dirty" in the tag - assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) + out_path = os.path.join(tmpdir, "test_json.json") + pd.show_versions(as_json=out_path) + with open(out_path) as out_fd: + result_file = "".join(out_fd.readlines()) - # check optional dependency - assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + assert result_console == result_file diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index ae3c8c98f8dc1..5ca7afa6634eb 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -107,7 +107,7 @@ def show_versions(as_json: Union[str, bool] = False) -> None: j = {"system": sys_info, "dependencies": deps} if as_json is True: - print(j) + sys.stdout.writelines(json.dumps(j, indent=2)) else: assert isinstance(as_json, str) # needed for mypy with codecs.open(as_json, "wb", encoding="utf8") as f: From 691b1a2924b913fda20215961dc3475838d6b630 Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Sat, 20 Feb 2021 20:25:32 -0500 Subject: [PATCH 2/6] BUG: Separate multiple tests (39701) --- pandas/tests/util/test_show_versions.py | 90 ++++++++++++++----------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index e686b3a7a9cc3..407c058bf5203 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -4,7 +4,10 @@ import pytest -from pandas.util._print_versions import _get_dependency_info, _get_sys_info +from pandas.util._print_versions import ( + _get_dependency_info, + _get_sys_info, +) import pandas as pd @@ -30,56 +33,67 @@ "ignore:Distutils:UserWarning" ) @pytest.mark.filterwarnings("ignore:Setuptools is replacing distutils:UserWarning") -@pytest.mark.parametrize("as_json", [True, False, "test_output.json"]) -def test_show_versions(capsys, as_json, tmpdir): - # gh-32041 - if isinstance(as_json, str): - as_json = os.path.join(tmpdir, as_json) +def test_show_versions(tmpdir): + as_json = os.path.join(tmpdir, "test_output.json") pd.show_versions(as_json=as_json) + + # make sure that the file was created + assert os.path.exists(as_json) + + with open(as_json) as fd: + contents = fd.readlines() + str_contents = "".join(contents) + + # make sure that there was output to the file + assert str_contents + + # check if file output is valid JSON, will raise an exception if not + dict_check = json.loads(str_contents) + + # Basic check that each version element is found in output + version_elements = { + "system": _get_sys_info(), + "dependencies": _get_dependency_info(), + } + + assert version_elements == dict_check + + +def test_show_versions_console_json(capsys): + pd.show_versions(as_json=True) captured = capsys.readouterr() result = captured.out - # check header for non-JSON console output - if as_json is False: - assert "INSTALLED VERSIONS" in result - - # check full commit hash - assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) + # check valid json is printed to the console if as_json is True + dict_check = json.loads(result) - # check required dependency - # 2020-12-09 npdev has "dirty" in the tag - assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) + # Basic check that each version element is found in output + version_elements = { + "system": _get_sys_info(), + "dependencies": _get_dependency_info(), + } - # check optional dependency - assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + assert version_elements == dict_check - # Dictionary-based asserts - else: - # check valid json is printed to the console if as_json is True - if as_json is True: - dict_check = json.loads(result) - elif isinstance(as_json, str): - # make sure that the file was created - assert os.path.exists(as_json) - with open(as_json) as fd: - contents = fd.readlines() - str_contents = "".join(contents) +def test_show_versions_console(capsys): + # gh-32041 + pd.show_versions(as_json=False) + captured = capsys.readouterr() + result = captured.out - # make sure that there was output to the file - assert str_contents + assert "INSTALLED VERSIONS" in result - # check if file output is valid JSON - dict_check = json.loads(str_contents) + # check full commit hash + assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) - # Basic check that each version element is found in output - version_elements = { - "system": _get_sys_info(), - "dependencies": _get_dependency_info(), - } + # check required dependency + # 2020-12-09 npdev has "dirty" in the tag + assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) - assert version_elements == dict_check + # check optional dependency + assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) def test_json_output_match(capsys, tmpdir): From 23d34c9c3ba7dab0dbbdc43364cdf3ae87fdb8e2 Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Sun, 21 Feb 2021 09:38:49 -0500 Subject: [PATCH 3/6] BUG: Various minor improvements on test_show_versions (#39701) --- pandas/tests/util/test_show_versions.py | 36 +++++++++++-------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 407c058bf5203..4346760f55b28 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -12,6 +12,10 @@ import pandas as pd +def get_captured_output(capsys): + return capsys.readouterr().out + + @pytest.mark.filterwarnings( # openpyxl "ignore:defusedxml.lxml is no longer supported:DeprecationWarning" @@ -38,51 +42,41 @@ def test_show_versions(tmpdir): pd.show_versions(as_json=as_json) - # make sure that the file was created - assert os.path.exists(as_json) - with open(as_json) as fd: - contents = fd.readlines() - str_contents = "".join(contents) - - # make sure that there was output to the file - assert str_contents - # check if file output is valid JSON, will raise an exception if not - dict_check = json.loads(str_contents) + result = json.load(fd) # Basic check that each version element is found in output - version_elements = { + expected = { "system": _get_sys_info(), "dependencies": _get_dependency_info(), } - assert version_elements == dict_check + assert result == expected def test_show_versions_console_json(capsys): pd.show_versions(as_json=True) - captured = capsys.readouterr() - result = captured.out + result = get_captured_output(capsys) # check valid json is printed to the console if as_json is True - dict_check = json.loads(result) + result = json.loads(result) # Basic check that each version element is found in output - version_elements = { + expected = { "system": _get_sys_info(), "dependencies": _get_dependency_info(), } - assert version_elements == dict_check + assert result == expected def test_show_versions_console(capsys): # gh-32041 pd.show_versions(as_json=False) - captured = capsys.readouterr() - result = captured.out + result = get_captured_output(capsys) + # check header assert "INSTALLED VERSIONS" in result # check full commit hash @@ -98,11 +92,11 @@ def test_show_versions_console(capsys): def test_json_output_match(capsys, tmpdir): pd.show_versions(as_json=True) - result_console = capsys.readouterr().out + result_console = get_captured_output(capsys) out_path = os.path.join(tmpdir, "test_json.json") pd.show_versions(as_json=out_path) with open(out_path) as out_fd: - result_file = "".join(out_fd.readlines()) + result_file = out_fd.read() assert result_console == result_file From 7cd411bc628ad8952fd746a9314d132bdc64df44 Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Sun, 21 Feb 2021 20:14:09 -0500 Subject: [PATCH 4/6] BUG: Various test improvements to test_show_versions (#39701) --- pandas/tests/util/test_show_versions.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 4346760f55b28..a9885ac94c047 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -38,6 +38,7 @@ def get_captured_output(capsys): ) @pytest.mark.filterwarnings("ignore:Setuptools is replacing distutils:UserWarning") def test_show_versions(tmpdir): + # GH39701 as_json = os.path.join(tmpdir, "test_output.json") pd.show_versions(as_json=as_json) @@ -56,11 +57,12 @@ def test_show_versions(tmpdir): def test_show_versions_console_json(capsys): + # GH39701 pd.show_versions(as_json=True) - result = get_captured_output(capsys) + stdout = capsys.readouterr().out # check valid json is printed to the console if as_json is True - result = json.loads(result) + result = json.loads(stdout) # Basic check that each version element is found in output expected = { @@ -73,8 +75,9 @@ def test_show_versions_console_json(capsys): def test_show_versions_console(capsys): # gh-32041 + # GH39701 pd.show_versions(as_json=False) - result = get_captured_output(capsys) + result = capsys.readouterr().out # check header assert "INSTALLED VERSIONS" in result @@ -91,8 +94,9 @@ def test_show_versions_console(capsys): def test_json_output_match(capsys, tmpdir): + # GH39701 pd.show_versions(as_json=True) - result_console = get_captured_output(capsys) + result_console = capsys.readouterr().out out_path = os.path.join(tmpdir, "test_json.json") pd.show_versions(as_json=out_path) From 02e6bc7c62012a8daf3e9b1b625fdb586d74a19b Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Mon, 22 Feb 2021 08:11:03 -0500 Subject: [PATCH 5/6] BUG: Remove unused function (#39701) --- pandas/tests/util/test_show_versions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index a9885ac94c047..af92c822cb836 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -12,10 +12,6 @@ import pandas as pd -def get_captured_output(capsys): - return capsys.readouterr().out - - @pytest.mark.filterwarnings( # openpyxl "ignore:defusedxml.lxml is no longer supported:DeprecationWarning" From c95783674ec03a67d9136ce16c2db458835d3bc2 Mon Sep 17 00:00:00 2001 From: Alexandre Prince-Levasseur <46272939+alexprincel@users.noreply.github.com> Date: Mon, 22 Feb 2021 13:00:22 -0500 Subject: [PATCH 6/6] Update github issue number Co-authored-by: Marco Gorelli --- pandas/tests/util/test_show_versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index af92c822cb836..57cd2e1a144b6 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -71,7 +71,7 @@ def test_show_versions_console_json(capsys): def test_show_versions_console(capsys): # gh-32041 - # GH39701 + # gh-32041 pd.show_versions(as_json=False) result = capsys.readouterr().out