Skip to content

Commit 3303c74

Browse files
committed
Improve readability of WinBashStatus class
- Factor out the code to get the Windows ACP to a helper function, and comment to explain why it doesn't use locale.getencoding. - Remove nonessential material in the WinBashStatus.check docstring and reword the rest for clarity. - Drop reStructuredText notation in the WinBashStatus docstrings, because in this case it seems to be making them harder to read in the code (we are not generating Sphinx documentation for tests.) - Revise the comments on specific steps in WinBashStatus._decode for accuracy and clarity.
1 parent b07e5c7 commit 3303c74

File tree

1 file changed

+47
-49
lines changed

1 file changed

+47
-49
lines changed

Diff for: test/test_index.py

+47-49
Original file line numberDiff line numberDiff line change
@@ -40,58 +40,60 @@
4040
log = logging.getLogger(__name__)
4141

4242

43+
def _get_windows_ansi_encoding():
44+
"""Get the encoding specified by the Windows system-wide ANSI active code page."""
45+
# locale.getencoding may work but is only in Python 3.11+. Use the registry instead.
46+
import winreg
47+
48+
hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage"
49+
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key:
50+
value, _ = winreg.QueryValueEx(key, "ACP")
51+
return f"cp{value}"
52+
53+
4354
@sumtype
4455
class WinBashStatus:
4556
"""Status of bash.exe for native Windows. Affects which commit hook tests can pass.
4657
47-
Call :meth:`check` to check the status.
48-
49-
The :class:`CheckError` and :class:`WinError` cases should not typically be used in
50-
``skip`` or ``xfail`` mark conditions, because they represent unexpected situations.
58+
Call check() to check the status. (CheckError and WinError should not typically be
59+
used to trigger skip or xfail, because they represent unexpected situations.)
5160
"""
5261

5362
Inapplicable = constructor()
5463
"""This system is not native Windows: either not Windows at all, or Cygwin."""
5564

5665
Absent = constructor()
57-
"""No command for ``bash.exe`` is found on the system."""
66+
"""No command for bash.exe is found on the system."""
5867

5968
Native = constructor()
60-
"""Running ``bash.exe`` operates outside any WSL distribution (as with Git Bash)."""
69+
"""Running bash.exe operates outside any WSL distribution (as with Git Bash)."""
6170

6271
Wsl = constructor()
63-
"""Running ``bash.exe`` calls ``bash`` in a WSL distribution."""
72+
"""Running bash.exe calls bash in a WSL distribution."""
6473

6574
WslNoDistro = constructor("process", "message")
66-
"""Running ``bash.exe` tries to run bash on a WSL distribution, but none exists."""
75+
"""Running bash.exe tries to run bash on a WSL distribution, but none exists."""
6776

6877
CheckError = constructor("process", "message")
69-
"""Running ``bash.exe`` fails in an unexpected error or gives unexpected output."""
78+
"""Running bash.exe fails in an unexpected error or gives unexpected output."""
7079

7180
WinError = constructor("exception")
72-
"""``bash.exe`` may exist but can't run. ``CreateProcessW`` fails unexpectedly."""
81+
"""bash.exe may exist but can't run. CreateProcessW fails unexpectedly."""
7382

7483
@classmethod
7584
def check(cls):
76-
"""Check the status of the ``bash.exe`` :func:`index.fun.run_commit_hook` uses.
77-
78-
This uses EAFP, attempting to run a command via ``bash.exe``. Which ``bash.exe``
79-
is used can't be reliably discovered by :func:`shutil.which`, which approximates
80-
how a shell is expected to search for an executable. On Windows, there are major
81-
differences between how executables are found by a shell and otherwise. (This is
82-
the cmd.exe Windows shell, and shouldn't be confused with bash.exe itself. That
83-
the command being looked up also happens to be an interpreter is not relevant.)
84-
85-
:func:`index.fun.run_commit_hook` uses :class:`subprocess.Popen`, including when
86-
it runs ``bash.exe`` on Windows. It doesn't pass ``shell=True`` (and shouldn't).
87-
On Windows, `Popen` calls ``CreateProcessW``, which checks some locations before
88-
using the ``PATH`` environment variable. It is expected to try the ``System32``
89-
directory, even if another directory containing the executable precedes it in
90-
``PATH``. (The other differences are less relevant here.) When WSL is present,
91-
even with no distributions, ``bash.exe`` usually exists in ``System32``, and
92-
`Popen` finds it even if another ``bash.exe`` precedes it in ``PATH``, as on CI.
93-
If WSL is absent, ``System32`` may still have ``bash.exe``, as Windows users and
94-
administrators occasionally put executables there in lieu of extending ``PATH``.
85+
"""Check the status of the bash.exe that run_commit_hook will try to use.
86+
87+
This runs a command with bash.exe and checks the result. On Windows, shell and
88+
non-shell executable search differ; shutil.which often finds the wrong bash.exe.
89+
90+
run_commit_hook uses Popen, including to run bash.exe on Windows. It doesn't
91+
pass shell=True (and shouldn't). On Windows, Popen calls CreateProcessW, which
92+
checks some locations before using the PATH environment variable. It is expected
93+
to try System32, even if another directory with the executable precedes it in
94+
PATH. When WSL is present, even with no distributions, bash.exe usually exists
95+
in System32; Popen finds it even if a shell would run another one, as on CI.
96+
(Without WSL, System32 may still have bash.exe; users sometimes put it there.)
9597
"""
9698
if os.name != "nt":
9799
return cls.Inapplicable()
@@ -124,39 +126,35 @@ def check(cls):
124126

125127
@staticmethod
126128
def _decode(stdout):
127-
"""Decode ``bash.exe`` output as best we can. (This is used only on Windows.)"""
129+
"""Decode bash.exe output as best we can."""
128130
# When bash.exe is the WSL wrapper but the output is from WSL itself rather than
129131
# code running in a distribution, the output is often in UTF-16LE, which Windows
130132
# uses internally. The UTF-16LE representation of a Windows-style line ending is
131133
# rarely seen otherwise, so use it to detect this situation.
132134
if b"\r\0\n\0" in stdout:
133135
return stdout.decode("utf-16le")
134136

135-
import winreg
136-
137-
# At this point, the output is probably either empty or not UTF-16LE. It's often
138-
# UTF-8 from inside a WSL distro or a non-WSL bash shell. But our test command
139-
# only uses the ASCII subset, so it's safe to guess wrong for that command's
140-
# output. Errors from inside a WSL distro or non-WSL bash.exe are arbitrary, but
141-
# unlike WSL's own messages, go to stderr, not stdout. So we can try the system
142-
# active code page first. (Although console programs usually use the OEM code
143-
# page, the ACP seems more accurate here. For example, on en-US Windows set to
144-
# fr-FR, the message, if not UTF-16LE, is windows-1252, same as the ACP, while
145-
# the OEM code page on such a system defaults to 437, which can't decode it.)
146-
hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage"
147-
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key:
148-
value, _ = winreg.QueryValueEx(key, "ACP")
137+
# At this point, the output is either blank or probably not UTF-16LE. It's often
138+
# UTF-8 from inside a WSL distro or non-WSL bash shell. Our test command only
139+
# uses the ASCII subset, so we can safely guess a wrong code page for it. Errors
140+
# from such an environment can contain any text, but unlike WSL's own messages,
141+
# they go to stderr, not stdout. So we can try the system ANSI code page first.
142+
# (Console programs often use the OEM code page, but the ACP seems more accurate
143+
# here. For example, on en-US Windows with the original system code page but the
144+
# display language set to fr-FR, the message, if not UTF-16LE, is windows-1252,
145+
# same as the ACP, while the OEMCP is 437, which can't decode its accents.)
146+
acp = _get_windows_ansi_encoding()
149147
try:
150-
return stdout.decode(f"cp{value}")
148+
return stdout.decode(acp)
151149
except UnicodeDecodeError:
152150
pass
153151
except LookupError as error:
154152
log.warning("%s", str(error)) # Message already says "Unknown encoding:".
155153

156-
# Assume UTF-8. If we don't have valid UTF-8, substitute Unicode replacement
157-
# characters. (For example, on zh-CN Windows set to fr-FR, error messages from
158-
# WSL itself, if not UTF-16LE, are in windows-1252, even though the ACP and OEM
159-
# code pages are 936; decoding as code page 936 or as UTF-8 both have errors.)
154+
# Assume UTF-8. If invalid, substitute Unicode replacement characters. (For
155+
# example, on zh-CN Windows set to display fr-FR, errors from WSL itself, if not
156+
# UTF-16LE, are in windows-1252, even though the ANSI and OEM code pages both
157+
# default to 936, and decoding as code page 936 or as UTF-8 both have errors.)
160158
return stdout.decode("utf-8", errors="replace")
161159

162160

0 commit comments

Comments
 (0)