Skip to content

Commit 9b9d39e

Browse files
committed
Abstract MultiByteToWideChar
1 parent 73b6574 commit 9b9d39e

File tree

1 file changed

+57
-33
lines changed
  • compiler/rustc_codegen_ssa/src/back

1 file changed

+57
-33
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

Lines changed: 57 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,42 +1064,66 @@ fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
10641064
}
10651065
match str::from_utf8(s) {
10661066
Ok(s) => return s.to_owned(),
1067-
Err(_) if s.len() <= i32::MAX as usize => {
1068-
use windows::Win32::Globalization::{
1069-
GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
1070-
LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
1071-
};
1072-
// Get the legacy system OEM code page.
1073-
let code_page = unsafe {
1074-
let mut cp: u32 = 0;
1075-
// We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
1076-
// But the API requires us to pass the data as though it's a [u16] string.
1077-
let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
1078-
let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
1079-
let len_written = GetLocaleInfoEx(
1080-
LOCALE_NAME_SYSTEM_DEFAULT,
1081-
LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
1082-
Some(data),
1083-
);
1084-
if len_written as usize == len { cp } else { CP_OEMCP }
1085-
};
1086-
// Error if the string is not valid for the expected code page.
1087-
let flags = MB_ERR_INVALID_CHARS;
1088-
// Call MultiByteToWideChar twice.
1089-
// First to calculate the length then to convert the string.
1090-
let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
1067+
Err(_) => match win::locale_byte_str_to_string(s, win::oem_code_page()) {
1068+
Some(s) => s,
1069+
// The string is not UTF-8 and isn't valid for the OEM code page
1070+
None => format!("Non-UTF-8 output: {}", s.escape_ascii()),
1071+
},
1072+
}
1073+
}
1074+
1075+
/// Wrappers around the Windows API.
1076+
#[cfg(windows)]
1077+
mod win {
1078+
use windows::Win32::Globalization::{
1079+
GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
1080+
LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
1081+
};
1082+
1083+
/// Get the Windows system OEM code page. This is most notably the code page
1084+
/// used for link.exe's output.
1085+
pub fn oem_code_page() -> u32 {
1086+
unsafe {
1087+
let mut cp: u32 = 0;
1088+
// We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
1089+
// But the API requires us to pass the data as though it's a [u16] string.
1090+
let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
1091+
let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
1092+
let len_written = GetLocaleInfoEx(
1093+
LOCALE_NAME_SYSTEM_DEFAULT,
1094+
LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
1095+
Some(data),
1096+
);
1097+
if len_written as usize == len { cp } else { CP_OEMCP }
1098+
}
1099+
}
1100+
/// Try to convert a multi-byte string to a UTF-8 string using the given code page
1101+
/// The string does not need to be null terminated.
1102+
///
1103+
/// This is implemented as a wrapper around `MultiByteToWideChar`.
1104+
/// See <https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar>
1105+
///
1106+
/// It will fail if the multi-byte string is longer than `i32::MAX` or if it contains
1107+
/// any invalid bytes for the expected encoding.
1108+
pub fn locale_byte_str_to_string(s: &[u8], code_page: u32) -> Option<String> {
1109+
// `MultiByteToWideChar` requires a length to be a "positive integer".
1110+
if s.len() > isize::MAX as usize {
1111+
return None;
1112+
}
1113+
// Error if the string is not valid for the expected code page.
1114+
let flags = MB_ERR_INVALID_CHARS;
1115+
// Call MultiByteToWideChar twice.
1116+
// First to calculate the length then to convert the string.
1117+
let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
1118+
if len > 0 {
1119+
let mut utf16 = vec![0; len as usize];
1120+
len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
10911121
if len > 0 {
1092-
let mut utf16 = vec![0; len as usize];
1093-
len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
1094-
if len > 0 {
1095-
return String::from_utf16_lossy(&utf16[..len as usize]);
1096-
}
1122+
return utf16.get(..len as usize).map(String::from_utf16_lossy);
10971123
}
10981124
}
1099-
_ => {}
1100-
};
1101-
// The string is not UTF-8 and isn't valid for the OEM code page
1102-
format!("Non-UTF-8 output: {}", s.escape_ascii())
1125+
None
1126+
}
11031127
}
11041128

11051129
fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {

0 commit comments

Comments
 (0)