-
Notifications
You must be signed in to change notification settings - Fork 107
WIP raw_dylib: write the import .lib manually. #1414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
136dc9d
8672689
f46537e
984ed7e
15f8053
08dcc1c
a38d471
50cbdc8
58325ad
326067e
cc26ee1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
use std::fs; | ||
use std::path::{Path, PathBuf}; | ||
|
||
use rustc_codegen_ssa::back::archive::{ | ||
|
@@ -15,11 +16,291 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { | |
fn create_dll_import_lib( | ||
&self, | ||
_sess: &Session, | ||
_lib_name: &str, | ||
_dll_imports: &[rustc_session::cstore::DllImport], | ||
_tmpdir: &Path, | ||
lib_name: &str, | ||
dll_imports: &[rustc_session::cstore::DllImport], | ||
tmpdir: &Path, | ||
_is_direct_dependency: bool, | ||
) -> PathBuf { | ||
unimplemented!("creating dll imports is not yet supported"); | ||
let mut import_names = Vec::new(); | ||
for dll_import in dll_imports { | ||
import_names.push(dll_import.name.as_str()); | ||
} | ||
let lib_path = tmpdir.join(format!("{}.lib", lib_name)); | ||
// todo: emit session error instead of expects | ||
fs::write(&lib_path, windows_import_lib::generate(lib_name, &import_names)) | ||
.expect("failed to write import library"); | ||
|
||
lib_path | ||
} | ||
} | ||
|
||
// todo: pull out to a proper location. Really should be in `object` crate! | ||
// todo: support ordinals | ||
// todo: support name types (e.g. verbatim+) | ||
// todo: support long member names | ||
// todo: support windows-gnu flavor? | ||
// todo: provide machine | ||
// todo: remove any panics, nice errors | ||
mod windows_import_lib { | ||
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format | ||
// | ||
// Windows .lib files are System-V (aka. GUN) flavored ar files with a couple of extra lookup | ||
// members. | ||
// | ||
// An archive is the 8 bytes b"!<arch>\n" | ||
// followed by a sequence of 60 byte member headers: | ||
// 0: name: [u8; 16], // member name, terminated with "/". If it is longer than 15, then | ||
// // use "/n" where "n" is a decimal for the offset in bytes into | ||
// // the longnames ("//") member contents. | ||
// 16: date: [u8; 12], // ASCII decimal seconds since UNIX epoch - always -1 for MSVC | ||
// 28: uid: [u8; 6], // ASCII decimal user id. Always blank for MSVC | ||
// 34: gid: [u8; 6], // ditto for group id. | ||
// 40: mode: [u8; 8], // ASCII octal UNIX mode. 0 for MSVC | ||
// 48: size: [u8; 10], // ASCII decimal data size. | ||
// 58: end: b"`\n", | ||
// then size bytes of payload. If payload is odd sized, pad | ||
// to an even offset with \n. | ||
// | ||
// You must store two extra members at the start, a legacy member lookup table member | ||
// and the current member lookup and symbol table, both with empty ("/") names. | ||
// | ||
// The legacy table member has the name "/" with following contents, using big-endian numbers: | ||
// count: u32, // number of indexed symbols | ||
// offsets: [u32, count], // file offsets to the header of the member that contains | ||
// // that symbol. | ||
// names: * // sequence of null terminated symbol names. | ||
// | ||
// The current table member also has the name "/", and has the following contents, using | ||
// little-endian numbers: | ||
// member_count: u32, // number of members | ||
// member_offsets: [u32; member_count], // file offsets to each member header | ||
// symbol_count: u32, // number of symbols | ||
// symbol_member: [u16; symbol_count], // *1-based* index of the member that contains | ||
// // each symbol | ||
// symbol_names: * // sequence of null terminated symbol names | ||
// | ||
// Then the long names member ("//") as with regular GNU ar files, just a sequence of | ||
// null terminated strings indexed by members using the long name format "/n" as described | ||
// above. | ||
// | ||
// Then regular members follow. | ||
// | ||
// This library emits only import libraries, that is, libraries with a short import object | ||
// describing an import from a dll. That means each member contains exactly one symbol. The member | ||
// name doesn't seem to matter, including duplicates, we use the dll name since that's what's in the | ||
// files generated by MSVC tools. | ||
// | ||
// The short import object has the form: | ||
// header: | ||
// sig1: 0u16 | ||
// sig2: 0xFFFFu16 | ||
// version: u16, // normally 0 | ||
// machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 | ||
// time_date_stamp: u32, // normally 0 | ||
// size_of_data: u32, // size following the header | ||
// ordinal_or_hint: u16, // depending on flag | ||
// object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 | ||
// name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 | ||
// reserved: u11, | ||
// data: // size_of_data bytes | ||
// name: * // import name; null terminated string | ||
// dll_name: * // dll name; null terminated string | ||
pub fn generate(dll_name: &str, import_names: &[&str]) -> Vec<u8> { | ||
assert!(import_names.len() <= 0xFFFF, "too many import names"); | ||
// number of symbols, and members containing symbols for symbol lookup members | ||
let symbol_count = import_names.len(); | ||
|
||
let mut writer = Writer::new(); | ||
|
||
// hack: trim dll name to 15 characters to avoid long member names | ||
let member_name = &dll_name[..dll_name.len().min(15)]; | ||
|
||
// legacy symbol directory | ||
let mut legacy_symbol_directory = writer.start_member_raw(); | ||
legacy_symbol_directory.set_raw_name(b"/"); | ||
legacy_symbol_directory.write_u32_be(symbol_count as u32); | ||
// reserve space for offsets. | ||
let legacy_member_table_offset = legacy_symbol_directory.reserve_bytes(symbol_count * 4); | ||
// string table | ||
for name in import_names { | ||
legacy_symbol_directory.write_c_str(name); | ||
} | ||
// done with legacy symbol directory | ||
drop(legacy_symbol_directory); | ||
|
||
// current symbol directory | ||
let mut current_symbol_directory = writer.start_member_raw(); | ||
current_symbol_directory.set_raw_name(b"/"); | ||
// member count: same as symbol count for import library | ||
current_symbol_directory.write_u32_le(symbol_count as u32); | ||
// reserve space for member offsets | ||
let current_member_table_offset = current_symbol_directory.reserve_bytes(symbol_count * 4); | ||
// symbol count | ||
current_symbol_directory.write_u32_le(symbol_count as u32); | ||
// we assume symbol members are already in order | ||
for index in 0..import_names.len() as u16 { | ||
current_symbol_directory.write_u16_le(1 + index); | ||
} | ||
// string table again (could just copy from legacy string table above?) | ||
for name in import_names { | ||
current_symbol_directory.write_c_str(name); | ||
} | ||
// done with current symbol directory | ||
drop(current_symbol_directory); | ||
|
||
// long names member not supported yet | ||
|
||
// import members | ||
for (index, name) in import_names.iter().enumerate() { | ||
let mut member = writer.start_member(member_name); | ||
// update member offsets | ||
let member_offset = member.header_offset as u32; | ||
member.data[legacy_member_table_offset + index * 4..][..4] | ||
.copy_from_slice(&member_offset.to_be_bytes()); | ||
member.data[current_member_table_offset + index * 4..][..4] | ||
.copy_from_slice(&member_offset.to_le_bytes()); | ||
// write import object: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this should be added to the object crate? It already has parser support for it. |
||
// signature | ||
member.write_u16_le(0); | ||
member.write_u16_le(0xFFFF); | ||
// version | ||
member.write_u16_le(0); | ||
// machine = AMD64 | ||
member.write_u16_le(0x8664); | ||
// time_date_stamp | ||
member.write_u32_le(0); | ||
// size_of_data | ||
member.write_u32_le((dll_name.len() + 1 + name.len() + 1) as u32); | ||
// ordinal_or_hint | ||
member.write_u16_le(0); | ||
// object_type | name_type = IMPORT_OBJECT_CODE | IMPORT_OBJECT_NAME | ||
member.write_u16_le(1 << 2 | 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The object crate has all those constants. |
||
// data: | ||
// name | ||
member.write_c_str(name); | ||
// dll_name | ||
member.write_c_str(dll_name); | ||
|
||
drop(member); | ||
} | ||
|
||
writer.data | ||
} | ||
|
||
struct Writer { | ||
data: Vec<u8>, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can reuse the archive writer code of ar_archive_writer, right? This is already used by rustc, so adding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ar_archive_writer supports this. It is a direct port of LLVM's archive writer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, I did start by checking that crate but didn't take a second look after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The LLVM writer (and therefore presumably |
||
|
||
impl Writer { | ||
fn new() -> Self { | ||
Self { data: Vec::from(*b"!<arch>\n") } | ||
} | ||
|
||
fn start_member_raw(&mut self) -> Member<'_> { | ||
let header_offset = self.data.len(); | ||
// fill the header with blanks... | ||
self.data.resize(header_offset + Member::HEADER_SIZE - 2, b' '); | ||
// except for end marker | ||
self.data.extend_from_slice(b"`\n"); | ||
|
||
let mut member = Member::new(&mut self.data, header_offset); | ||
// init date, mode to default values as produced by MSVC tools | ||
member.set_time_date_stamp(-1); | ||
member.set_mode(0); | ||
member | ||
} | ||
|
||
fn start_member(&mut self, name: &str) -> Member<'_> { | ||
let mut member = self.start_member_raw(); | ||
member.set_name(name); | ||
member | ||
} | ||
} | ||
|
||
struct Member<'a> { | ||
data: &'a mut Vec<u8>, | ||
header_offset: usize, | ||
} | ||
|
||
impl<'a> Member<'a> { | ||
const HEADER_SIZE: usize = 60; | ||
|
||
fn new(data: &'a mut Vec<u8>, header_offset: usize) -> Self { | ||
Self { data, header_offset } | ||
} | ||
|
||
fn header_slice(&mut self, offset: usize, len: usize) -> &mut [u8] { | ||
&mut self.data[self.header_offset + offset..][..len] | ||
} | ||
|
||
fn set_name(&mut self, name: &str) { | ||
assert!(name.len() < 16, "long member names not supported yet"); | ||
self.set_raw_name(name.as_bytes()); | ||
self.data[self.header_offset + name.len()] = b'/'; | ||
} | ||
|
||
fn set_raw_name(&mut self, raw_name: &[u8]) { | ||
assert!(raw_name.len() <= 16, "raw name must be <= 16 bytes"); | ||
self.header_slice(0, raw_name.len()).copy_from_slice(raw_name); | ||
} | ||
|
||
fn set_time_date_stamp(&mut self, value: i32) { | ||
self.set_decimal_field(16, 12, value); | ||
} | ||
|
||
fn set_uid(&mut self, value: i32) { | ||
self.set_decimal_field(28, 6, value); | ||
} | ||
|
||
fn set_gid(&mut self, value: i32) { | ||
self.set_decimal_field(34, 6, value); | ||
} | ||
|
||
fn set_mode(&mut self, value: i32) { | ||
use std::io::Write; | ||
write!(std::io::Cursor::new(self.header_slice(40, 8)), "{value:o}") | ||
.expect("value too large"); | ||
} | ||
|
||
fn set_decimal_field(&mut self, offset: usize, size: usize, value: i32) { | ||
use std::io::Write; | ||
write!(std::io::Cursor::new(self.header_slice(offset, size)), "{value}") | ||
.expect("value too large"); | ||
} | ||
|
||
fn write_c_str(&mut self, data: &str) { | ||
self.data.extend_from_slice(data.as_bytes()); | ||
self.data.push(0); | ||
} | ||
|
||
fn write_u16_le(&mut self, data: u16) { | ||
self.data.extend_from_slice(&data.to_le_bytes()); | ||
} | ||
|
||
fn write_u32_be(&mut self, data: u32) { | ||
self.data.extend_from_slice(&data.to_be_bytes()); | ||
} | ||
|
||
fn write_u32_le(&mut self, data: u32) { | ||
self.data.extend_from_slice(&data.to_le_bytes()); | ||
} | ||
|
||
fn reserve_bytes(&mut self, count: usize) -> usize { | ||
let offset = self.data.len(); | ||
self.data.resize(offset + count, 0); | ||
offset | ||
} | ||
} | ||
|
||
impl<'a> Drop for Member<'a> { | ||
fn drop(&mut self) { | ||
let data_size = self.data.len() - self.header_offset - Self::HEADER_SIZE; | ||
assert!(data_size < i32::MAX as usize); | ||
self.set_decimal_field(48, 10, data_size as i32); | ||
// pad to even address | ||
if data_size % 2 == 1 { | ||
self.data.push(b'\n'); | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to be careful about lib name collision? I.e. should a random-ish id be added to the pathname to disambiguate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would expect rustc to merge all extern blocks with the same lib name. If that is indeed the case, there can't be any collisions as the tmpdir is unique per rustc invocation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough then!