|
40 | 40 | log = logging.getLogger(__name__)
|
41 | 41 |
|
42 | 42 |
|
| 43 | +def _get_windows_ansi_encoding(): |
| 44 | + """Get the encoding specified by the Windows system-wide ANSI active code page.""" |
| 45 | + # locale.getencoding may work but is only in Python 3.11+. Use the registry instead. |
| 46 | + import winreg |
| 47 | + |
| 48 | + hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage" |
| 49 | + with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key: |
| 50 | + value, _ = winreg.QueryValueEx(key, "ACP") |
| 51 | + return f"cp{value}" |
| 52 | + |
| 53 | + |
43 | 54 | @sumtype
|
44 | 55 | class WinBashStatus:
|
45 | 56 | """Status of bash.exe for native Windows. Affects which commit hook tests can pass.
|
46 | 57 |
|
47 |
| - Call :meth:`check` to check the status. |
48 |
| -
|
49 |
| - The :class:`CheckError` and :class:`WinError` cases should not typically be used in |
50 |
| - ``skip`` or ``xfail`` mark conditions, because they represent unexpected situations. |
| 58 | + Call check() to check the status. (CheckError and WinError should not typically be |
| 59 | + used to trigger skip or xfail, because they represent unexpected situations.) |
51 | 60 | """
|
52 | 61 |
|
53 | 62 | Inapplicable = constructor()
|
54 | 63 | """This system is not native Windows: either not Windows at all, or Cygwin."""
|
55 | 64 |
|
56 | 65 | Absent = constructor()
|
57 |
| - """No command for ``bash.exe`` is found on the system.""" |
| 66 | + """No command for bash.exe is found on the system.""" |
58 | 67 |
|
59 | 68 | Native = constructor()
|
60 |
| - """Running ``bash.exe`` operates outside any WSL distribution (as with Git Bash).""" |
| 69 | + """Running bash.exe operates outside any WSL distribution (as with Git Bash).""" |
61 | 70 |
|
62 | 71 | Wsl = constructor()
|
63 |
| - """Running ``bash.exe`` calls ``bash`` in a WSL distribution.""" |
| 72 | + """Running bash.exe calls bash in a WSL distribution.""" |
64 | 73 |
|
65 | 74 | WslNoDistro = constructor("process", "message")
|
66 |
| - """Running ``bash.exe` tries to run bash on a WSL distribution, but none exists.""" |
| 75 | + """Running bash.exe tries to run bash on a WSL distribution, but none exists.""" |
67 | 76 |
|
68 | 77 | CheckError = constructor("process", "message")
|
69 |
| - """Running ``bash.exe`` fails in an unexpected error or gives unexpected output.""" |
| 78 | + """Running bash.exe fails in an unexpected error or gives unexpected output.""" |
70 | 79 |
|
71 | 80 | WinError = constructor("exception")
|
72 |
| - """``bash.exe`` may exist but can't run. ``CreateProcessW`` fails unexpectedly.""" |
| 81 | + """bash.exe may exist but can't run. CreateProcessW fails unexpectedly.""" |
73 | 82 |
|
74 | 83 | @classmethod
|
75 | 84 | def check(cls):
|
76 |
| - """Check the status of the ``bash.exe`` :func:`index.fun.run_commit_hook` uses. |
77 |
| -
|
78 |
| - This uses EAFP, attempting to run a command via ``bash.exe``. Which ``bash.exe`` |
79 |
| - is used can't be reliably discovered by :func:`shutil.which`, which approximates |
80 |
| - how a shell is expected to search for an executable. On Windows, there are major |
81 |
| - differences between how executables are found by a shell and otherwise. (This is |
82 |
| - the cmd.exe Windows shell, and shouldn't be confused with bash.exe itself. That |
83 |
| - the command being looked up also happens to be an interpreter is not relevant.) |
84 |
| -
|
85 |
| - :func:`index.fun.run_commit_hook` uses :class:`subprocess.Popen`, including when |
86 |
| - it runs ``bash.exe`` on Windows. It doesn't pass ``shell=True`` (and shouldn't). |
87 |
| - On Windows, `Popen` calls ``CreateProcessW``, which checks some locations before |
88 |
| - using the ``PATH`` environment variable. It is expected to try the ``System32`` |
89 |
| - directory, even if another directory containing the executable precedes it in |
90 |
| - ``PATH``. (The other differences are less relevant here.) When WSL is present, |
91 |
| - even with no distributions, ``bash.exe`` usually exists in ``System32``, and |
92 |
| - `Popen` finds it even if another ``bash.exe`` precedes it in ``PATH``, as on CI. |
93 |
| - If WSL is absent, ``System32`` may still have ``bash.exe``, as Windows users and |
94 |
| - administrators occasionally put executables there in lieu of extending ``PATH``. |
| 85 | + """Check the status of the bash.exe that run_commit_hook will try to use. |
| 86 | +
|
| 87 | + This runs a command with bash.exe and checks the result. On Windows, shell and |
| 88 | + non-shell executable search differ; shutil.which often finds the wrong bash.exe. |
| 89 | +
|
| 90 | + run_commit_hook uses Popen, including to run bash.exe on Windows. It doesn't |
| 91 | + pass shell=True (and shouldn't). On Windows, Popen calls CreateProcessW, which |
| 92 | + checks some locations before using the PATH environment variable. It is expected |
| 93 | + to try System32, even if another directory with the executable precedes it in |
| 94 | + PATH. When WSL is present, even with no distributions, bash.exe usually exists |
| 95 | + in System32; Popen finds it even if a shell would run another one, as on CI. |
| 96 | + (Without WSL, System32 may still have bash.exe; users sometimes put it there.) |
95 | 97 | """
|
96 | 98 | if os.name != "nt":
|
97 | 99 | return cls.Inapplicable()
|
@@ -124,39 +126,35 @@ def check(cls):
|
124 | 126 |
|
125 | 127 | @staticmethod
|
126 | 128 | def _decode(stdout):
|
127 |
| - """Decode ``bash.exe`` output as best we can. (This is used only on Windows.)""" |
| 129 | + """Decode bash.exe output as best we can.""" |
128 | 130 | # When bash.exe is the WSL wrapper but the output is from WSL itself rather than
|
129 | 131 | # code running in a distribution, the output is often in UTF-16LE, which Windows
|
130 | 132 | # uses internally. The UTF-16LE representation of a Windows-style line ending is
|
131 | 133 | # rarely seen otherwise, so use it to detect this situation.
|
132 | 134 | if b"\r\0\n\0" in stdout:
|
133 | 135 | return stdout.decode("utf-16le")
|
134 | 136 |
|
135 |
| - import winreg |
136 |
| - |
137 |
| - # At this point, the output is probably either empty or not UTF-16LE. It's often |
138 |
| - # UTF-8 from inside a WSL distro or a non-WSL bash shell. But our test command |
139 |
| - # only uses the ASCII subset, so it's safe to guess wrong for that command's |
140 |
| - # output. Errors from inside a WSL distro or non-WSL bash.exe are arbitrary, but |
141 |
| - # unlike WSL's own messages, go to stderr, not stdout. So we can try the system |
142 |
| - # active code page first. (Although console programs usually use the OEM code |
143 |
| - # page, the ACP seems more accurate here. For example, on en-US Windows set to |
144 |
| - # fr-FR, the message, if not UTF-16LE, is windows-1252, same as the ACP, while |
145 |
| - # the OEM code page on such a system defaults to 437, which can't decode it.) |
146 |
| - hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage" |
147 |
| - with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key: |
148 |
| - value, _ = winreg.QueryValueEx(key, "ACP") |
| 137 | + # At this point, the output is either blank or probably not UTF-16LE. It's often |
| 138 | + # UTF-8 from inside a WSL distro or non-WSL bash shell. Our test command only |
| 139 | + # uses the ASCII subset, so we can safely guess a wrong code page for it. Errors |
| 140 | + # from such an environment can contain any text, but unlike WSL's own messages, |
| 141 | + # they go to stderr, not stdout. So we can try the system ANSI code page first. |
| 142 | + # (Console programs often use the OEM code page, but the ACP seems more accurate |
| 143 | + # here. For example, on en-US Windows with the original system code page but the |
| 144 | + # display language set to fr-FR, the message, if not UTF-16LE, is windows-1252, |
| 145 | + # same as the ACP, while the OEMCP is 437, which can't decode its accents.) |
| 146 | + acp = _get_windows_ansi_encoding() |
149 | 147 | try:
|
150 |
| - return stdout.decode(f"cp{value}") |
| 148 | + return stdout.decode(acp) |
151 | 149 | except UnicodeDecodeError:
|
152 | 150 | pass
|
153 | 151 | except LookupError as error:
|
154 | 152 | log.warning("%s", str(error)) # Message already says "Unknown encoding:".
|
155 | 153 |
|
156 |
| - # Assume UTF-8. If we don't have valid UTF-8, substitute Unicode replacement |
157 |
| - # characters. (For example, on zh-CN Windows set to fr-FR, error messages from |
158 |
| - # WSL itself, if not UTF-16LE, are in windows-1252, even though the ACP and OEM |
159 |
| - # code pages are 936; decoding as code page 936 or as UTF-8 both have errors.) |
| 154 | + # Assume UTF-8. If invalid, substitute Unicode replacement characters. (For |
| 155 | + # example, on zh-CN Windows set to display fr-FR, errors from WSL itself, if not |
| 156 | + # UTF-16LE, are in windows-1252, even though the ANSI and OEM code pages both |
| 157 | + # default to 936, and decoding as code page 936 or as UTF-8 both have errors.) |
160 | 158 | return stdout.decode("utf-8", errors="replace")
|
161 | 159 |
|
162 | 160 |
|
|
0 commit comments