Skip to content

Commit 0ede723

Browse files
fmeumcopybara-github
authored andcommitted
Always run Turbine native image with a UTF-8 code page on Windows
We are building Turbine native images with a UTF-8 code page and need to match the runtime code page for UTF-8 paths to be handled properly. This change splits the existing tool for patching the app manifest of the embedded JDK into separate tools for reading and writing app manifests. The write tool is then used to add a manifest to the turbine binary. Closes #25444. PiperOrigin-RevId: 735457536 Change-Id: Id6395435cc350772e1225ecb80fa32a74f6ae0e3
1 parent c2e8cd5 commit 0ede723

File tree

8 files changed

+200
-133
lines changed

8 files changed

+200
-133
lines changed

src/BUILD

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,24 +181,31 @@ filegroup(
181181
visibility = ["//src/test/shell/bazel:__pkg__"],
182182
)
183183

184-
# This program patches the app manifest of the java.exe launcher to force its
185-
# active code page to UTF-8 on Windows 1903 and later, which is required for
186-
# proper support of Unicode characters outside the system code page.
187-
# The JDK currently (as of JDK 23) doesn't support this natively:
188-
# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html
184+
# Reads the app manifest of a Windows executable.
189185
cc_binary(
190-
name = "patch_java_manifest_for_utf8",
191-
srcs = ["patch_java_manifest_for_utf8.cc"],
186+
name = "read_manifest",
187+
srcs = ["read_manifest.cc"],
192188
tags = ["manual"],
193189
target_compatible_with = ["@platforms//os:windows"],
190+
visibility = ["//src/java_tools:__subpackages__"],
191+
)
192+
193+
# Updates the app manifest of a Windows executable.
194+
cc_binary(
195+
name = "write_manifest",
196+
srcs = ["write_manifest.cc"],
197+
tags = ["manual"],
198+
target_compatible_with = ["@platforms//os:windows"],
199+
visibility = ["//src/java_tools:__subpackages__"],
194200
)
195201

196202
sh_binary(
197203
name = "minimize_jdk",
198204
srcs = ["minimize_jdk.sh"],
199205
data = select({
200206
"@platforms//os:windows": [
201-
":patch_java_manifest_for_utf8",
207+
":read_manifest",
208+
":write_manifest",
202209
],
203210
"//conditions:default": [],
204211
}),

src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
load("@rules_graalvm//graalvm:defs.bzl", "native_image")
2-
load("@rules_java//java:defs.bzl", "java_binary")
2+
load("@rules_java//java:defs.bzl", "java_binary", "java_library")
33

44
package(
55
default_applicable_licenses = ["//:license"],
@@ -26,13 +26,21 @@ java_binary(
2626
runtime_deps = [":turbine_deps"],
2727
)
2828

29-
native_image(
29+
alias(
3030
name = "turbine_direct_graal",
31+
actual = select({
32+
"@platforms//os:windows": ":turbine_direct_graal_with_app_manifest",
33+
"//conditions:default": ":turbine_direct_graal_unpatched",
34+
}),
35+
)
36+
37+
native_image(
38+
name = "turbine_direct_graal_unpatched",
3139
executable_name = select({
3240
# TODO(cushon): restore .exe suffix on windows
3341
# see https://github.com/sgammon/rules_graalvm/issues/324
34-
"@bazel_tools//src/conditions:windows": "%target%",
35-
"//conditions:default": "%target%",
42+
"@bazel_tools//src/conditions:windows": "turbine_direct_graal_unpatched",
43+
"//conditions:default": "turbine_direct_graal",
3644
}),
3745
extra_args = [
3846
# Workaround for https://github.com/oracle/graal/issues/4757.
@@ -74,6 +82,26 @@ native_image(
7482
deps = [":turbine_deps"],
7583
)
7684

85+
# On Windows, add an app manifest to the binary to force it to run with a UTF-8
86+
# code page. It is built with one, but without the app manifest it will not be
87+
# able to use UTF-8 for filesystem operations.
88+
# https://github.com/oracle/graal/issues/10237
89+
genrule(
90+
name = "turbine_direct_graal_with_app_manifest",
91+
srcs = [
92+
":turbine_direct_graal_unpatched",
93+
"turbine_direct_graal.manifest",
94+
],
95+
outs = ["turbine_direct_graal.exe"],
96+
cmd = """\
97+
cp $(location :turbine_direct_graal_unpatched) $@
98+
chmod +w $@
99+
cat $(location turbine_direct_graal.manifest) | $(location //src:write_manifest) $@
100+
""",
101+
target_compatible_with = ["@platforms//os:windows"],
102+
tools = ["//src:write_manifest"],
103+
)
104+
77105
# Run with -c opt.
78106
sh_binary(
79107
name = "turbine_benchmark",
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2+
<assembly xmlns="urn:schemas-microsoft-com:asm.v1"
3+
manifestVersion="1.0"
4+
xmlns:asmv3="urn:schemas-microsoft-com:asm.v3"
5+
>
6+
<assemblyIdentity
7+
name="turbine_direct_graal.exe"
8+
version = "1.0.0.0"
9+
type="win32"
10+
/>
11+
<description>Turbine</description>
12+
13+
<asmv3:application>
14+
<asmv3:windowsSettings>
15+
<activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
16+
</asmv3:windowsSettings>
17+
</asmv3:application>
18+
19+
</assembly>

src/minimize_jdk.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ fi
6161
UNAME=$(uname -s | tr 'A-Z' 'a-z')
6262

6363
if [[ "$UNAME" =~ msys_nt* ]]; then
64-
set -x
6564
mkdir "tmp.$$"
6665
cd "tmp.$$"
6766
unzip -q "../$fulljdk"
@@ -73,7 +72,14 @@ if [[ "$UNAME" =~ msys_nt* ]]; then
7372
./bin/jlink --module-path ./jmods/ --add-modules "$modules" \
7473
--vm=server --strip-debug --no-man-pages \
7574
--output reduced
76-
"$(rlocation "io_bazel/src/patch_java_manifest_for_utf8.exe")" reduced/bin/java.exe
75+
# Patch the app manifest of the java.exe launcher to force its active code
76+
# page to UTF-8 on Windows 1903 and later, which is required for proper
77+
# support of Unicode characters outside the system code page.
78+
# The JDK currently (as of JDK 23) doesn't support this natively:
79+
# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html
80+
"$(rlocation io_bazel/src/read_manifest.exe)" reduced/bin/java.exe \
81+
| sed 's|</asmv3:windowsSettings>|<activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>&|' \
82+
| "$(rlocation io_bazel/src/write_manifest.exe)" reduced/bin/java.exe
7783
cp $DOCS legal/java.base/ASSEMBLY_EXCEPTION \
7884
reduced/
7985
# These are necessary for --host_jvm_debug to work.

src/patch_java_manifest_for_utf8.cc

Lines changed: 0 additions & 103 deletions
This file was deleted.

src/read_manifest.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright 2024 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <stdio.h>
16+
#include <stdlib.h>
17+
18+
#define WIN32_LEAN_AND_MEAN
19+
#include <windows.h>
20+
21+
#include <string>
22+
23+
// Extracts the app manifest of a Windows executable and prints it to stdout.
24+
int wmain(int argc, wchar_t *argv[]) {
25+
if (argc != 2) {
26+
fwprintf(stderr, L"Usage: %ls <filename>\n", argv[0]);
27+
return 1;
28+
}
29+
30+
// Read the app manifest (aka side-by-side or fusion manifest) from the
31+
// executable, which requires loading it as a "module".
32+
HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE);
33+
if (!exe) {
34+
fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError());
35+
return 1;
36+
}
37+
HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST);
38+
if (!manifest_resource) {
39+
fwprintf(stderr, L"Resource not found: %d\n", GetLastError());
40+
return 1;
41+
}
42+
HGLOBAL manifest_handle = LoadResource(exe, manifest_resource);
43+
if (!manifest_handle) {
44+
fwprintf(stderr, L"Error loading resource: %d\n", GetLastError());
45+
return 1;
46+
}
47+
LPVOID manifest_data = LockResource(manifest_handle);
48+
if (!manifest_data) {
49+
fwprintf(stderr, L"Error locking resource: %d\n", GetLastError());
50+
return 1;
51+
}
52+
DWORD manifest_len = SizeofResource(exe, manifest_resource);
53+
54+
// Write the manifest to stdout.
55+
fwrite(manifest_data, 1, manifest_len, stdout);
56+
57+
UnlockResource(manifest_handle);
58+
FreeResource(manifest_handle);
59+
FreeLibrary(exe);
60+
61+
return 0;
62+
}

src/test/shell/bazel/bazel_java_test.sh

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,32 +1990,24 @@ EOF
19901990
}
19911991

19921992
function test_header_compiler_direct_supports_unicode() {
1993-
if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then
1993+
if [[ "${JAVA_TOOLS_ZIP}" == released && "$is_windows" ]]; then
19941994
# TODO: Enable test after the next java_tools release.
19951995
return 0
19961996
fi
19971997

1998-
if "$is_windows"; then
1999-
# GraalVM native images on Windows use the same active code page they have been built
2000-
# with, which in the case of Bazel CI is 1252 (not UTF-8). Even with -H:+AddAllCharsets
2001-
# InvalidPathExceptions are still thrown when accessing a Unicode file path, indicating a
2002-
# problem within GraalVM's path encoding handling.
2003-
# https://github.com/oracle/graal/issues/10237
2004-
# TODO: Fix this by building java_tools binaries on a machine with system code page set to
2005-
# UTF-8.
2006-
echo "Skipping test on Windows"
2007-
return 0
2008-
elif [[ "$(uname -s)" == "Linux" ]]; then
1998+
# JVMs on macOS always support UTF-8 since JEP 400.
1999+
# Windows releases of Turbine are built on a machine with system code page set
2000+
# to UTF-8 so that Graal picks up the correct sun.jnu.encoding value *and*
2001+
# have an app manifest patched in to set the system code page to UTF-8 at
2002+
# runtime.
2003+
if [[ "$(uname -s)" == "Linux" ]]; then
20092004
export LC_ALL=C.UTF-8
20102005
if [[ $(locale charmap) != "UTF-8" ]]; then
20112006
echo "Skipping test due to missing UTF-8 locale"
20122007
return 0
20132008
fi
2014-
local -r unicode="äöüÄÖÜß🌱"
2015-
else
2016-
# JVMs on macOS always support UTF-8 since JEP 400.
2017-
local -r unicode="äöüÄÖÜß🌱"
20182009
fi
2010+
local -r unicode="äöüÄÖÜß🌱"
20192011
mkdir -p pkg
20202012
cat << EOF > pkg/BUILD
20212013
java_library(name = "a", srcs = ["A.java"], deps = [":b"])

0 commit comments

Comments
 (0)