|
7 | 7 | //! `GetModuleHandle` and `GetProcAddress` to look up DLL entry points at
|
8 | 8 | //! runtime.
|
9 | 9 | //!
|
10 |
| -//! This implementation uses a static initializer to look up the DLL entry |
11 |
| -//! points. The CRT (C runtime) executes static initializers before `main` |
12 |
| -//! is called (for binaries) and before `DllMain` is called (for DLLs). |
13 |
| -//! This is the ideal time to look up DLL imports, because we are guaranteed |
14 |
| -//! that no other threads will attempt to call these entry points. Thus, |
15 |
| -//! we can look up the imports and store them in `static mut` fields |
16 |
| -//! without any synchronization. |
| 10 | +//! This is implemented simply by storing a function pointer in an atomic. |
| 11 | +//! Loading and calling this function will have little or no overhead |
| 12 | +//! compared with calling any other dynamically imported function. |
17 | 13 | //!
|
18 |
| -//! This has an additional advantage: Because the DLL import lookup happens |
19 |
| -//! at module initialization, the cost of these lookups is deterministic, |
20 |
| -//! and is removed from the code paths that actually call the DLL imports. |
21 |
| -//! That is, there is no unpredictable "cache miss" that occurs when calling |
22 |
| -//! a DLL import. For applications that benefit from predictable delays, |
23 |
| -//! this is a benefit. This also eliminates the comparison-and-branch |
24 |
| -//! from the hot path. |
25 |
| -//! |
26 |
| -//! Currently, the standard library uses only a small number of dynamic |
27 |
| -//! DLL imports. If this number grows substantially, then the cost of |
28 |
| -//! performing all of the lookups at initialization time might become |
29 |
| -//! substantial. |
30 |
| -//! |
31 |
| -//! The mechanism of registering a static initializer with the CRT is |
32 |
| -//! documented in |
33 |
| -//! [CRT Initialization](https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-160). |
34 |
| -//! It works by contributing a global symbol to the `.CRT$XCU` section. |
35 |
| -//! The linker builds a table of all static initializer functions. |
36 |
| -//! The CRT startup code then iterates that table, calling each |
37 |
| -//! initializer function. |
38 |
| -//! |
39 |
| -//! # **WARNING!!* |
40 |
| -//! The environment that a static initializer function runs in is highly |
41 |
| -//! constrained. There are **many** restrictions on what static initializers |
42 |
| -//! can safely do. Static initializer functions **MUST NOT** do any of the |
43 |
| -//! following (this list is not comprehensive): |
44 |
| -//! * touch any other static field that is used by a different static |
45 |
| -//! initializer, because the order that static initializers run in |
46 |
| -//! is not defined. |
47 |
| -//! * call `LoadLibrary` or any other function that acquires the DLL |
48 |
| -//! loader lock. |
49 |
| -//! * call any Rust function or CRT function that touches any static |
50 |
| -//! (global) state. |
| 14 | +//! The stored function pointer starts out as an importer function which will |
| 15 | +//! swap itself with the real function when it's called for the first time. If |
| 16 | +//! the real function can't be imported then a fallback function is used in its |
| 17 | +//! place. While this is low cost for the happy path (where the function is |
| 18 | +//! already loaded) it does mean there's some overhead the first time the |
| 19 | +//! function is called. In the worst case, multiple threads may all end up |
| 20 | +//! importing the same function unnecessarily. |
51 | 21 |
|
52 | 22 | use crate::ffi::{c_void, CStr};
|
53 | 23 | use crate::ptr::NonNull;
|
@@ -85,39 +55,6 @@ pub(crate) const fn const_cstr_from_bytes(bytes: &'static [u8]) -> &'static CStr
|
85 | 55 | unsafe { crate::ffi::CStr::from_bytes_with_nul_unchecked(bytes) }
|
86 | 56 | }
|
87 | 57 |
|
88 |
| -#[used] |
89 |
| -#[link_section = ".CRT$XCU"] |
90 |
| -static INIT_TABLE_ENTRY: unsafe extern "C" fn() = init; |
91 |
| - |
92 |
| -/// This is where the magic preloading of symbols happens. |
93 |
| -/// |
94 |
| -/// Note that any functions included here will be unconditionally included in |
95 |
| -/// the final binary, regardless of whether or not they're actually used. |
96 |
| -/// |
97 |
| -/// Therefore, this is limited to `compat_fn_optional` functions which must be |
98 |
| -/// preloaded and any functions which may be more time sensitive, even for the first call. |
99 |
| -unsafe extern "C" fn init() { |
100 |
| - // There is no locking here. This code is executed before main() is entered, and |
101 |
| - // is guaranteed to be single-threaded. |
102 |
| - // |
103 |
| - // DO NOT do anything interesting or complicated in this function! DO NOT call |
104 |
| - // any Rust functions or CRT functions if those functions touch any global state, |
105 |
| - // because this function runs during global initialization. For example, DO NOT |
106 |
| - // do any dynamic allocation, don't call LoadLibrary, etc. |
107 |
| - |
108 |
| - if let Some(synch) = Module::new(c::SYNCH_API) { |
109 |
| - // These are optional and so we must manually attempt to load them |
110 |
| - // before they can be used. |
111 |
| - c::WaitOnAddress::preload(synch); |
112 |
| - c::WakeByAddressSingle::preload(synch); |
113 |
| - } |
114 |
| - |
115 |
| - if let Some(kernel32) = Module::new(c::KERNEL32) { |
116 |
| - // Preloading this means getting a precise time will be as fast as possible. |
117 |
| - c::GetSystemTimePreciseAsFileTime::preload(kernel32); |
118 |
| - } |
119 |
| -} |
120 |
| - |
121 | 58 | /// Represents a loaded module.
|
122 | 59 | ///
|
123 | 60 | /// Note that the modules std depends on must not be unloaded.
|
@@ -151,7 +88,7 @@ impl Module {
|
151 | 88 | macro_rules! compat_fn_with_fallback {
|
152 | 89 | (pub static $module:ident: &CStr = $name:expr; $(
|
153 | 90 | $(#[$meta:meta])*
|
154 |
| - pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $fallback_body:block |
| 91 | + $vis:vis fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $fallback_body:block |
155 | 92 | )*) => (
|
156 | 93 | pub static $module: &CStr = $name;
|
157 | 94 | $(
|
@@ -196,78 +133,72 @@ macro_rules! compat_fn_with_fallback {
|
196 | 133 | $fallback_body
|
197 | 134 | }
|
198 | 135 |
|
199 |
| - #[allow(unused)] |
200 |
| - pub(in crate::sys) fn preload(module: Module) { |
201 |
| - load_from_module(Some(module)); |
202 |
| - } |
203 |
| - |
204 | 136 | #[inline(always)]
|
205 | 137 | pub unsafe fn call($($argname: $argtype),*) -> $rettype {
|
206 | 138 | let func: F = mem::transmute(PTR.load(Ordering::Relaxed));
|
207 | 139 | func($($argname),*)
|
208 | 140 | }
|
209 | 141 | }
|
210 | 142 | $(#[$meta])*
|
211 |
| - pub use $symbol::call as $symbol; |
| 143 | + $vis use $symbol::call as $symbol; |
212 | 144 | )*)
|
213 | 145 | }
|
214 | 146 |
|
215 |
| -/// A function that either exists or doesn't. |
| 147 | +/// Optionally load `WaitOnAddress`. |
| 148 | +/// Unlike the dynamic loading described above, this does not have a fallback. |
216 | 149 | ///
|
217 |
| -/// NOTE: Optional functions must be preloaded in the `init` function above, or they will always be None. |
218 |
| -macro_rules! compat_fn_optional { |
219 |
| - (pub static $module:ident: &CStr = $name:expr; $( |
220 |
| - $(#[$meta:meta])* |
221 |
| - pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty; |
222 |
| - )*) => ( |
223 |
| - pub static $module: &CStr = $name; |
224 |
| - $( |
225 |
| - $(#[$meta])* |
226 |
| - pub mod $symbol { |
227 |
| - #[allow(unused_imports)] |
228 |
| - use super::*; |
229 |
| - use crate::mem; |
230 |
| - use crate::sync::atomic::{AtomicPtr, Ordering}; |
231 |
| - use crate::sys::compat::Module; |
232 |
| - use crate::ptr::{self, NonNull}; |
233 |
| - |
234 |
| - type F = unsafe extern "system" fn($($argtype),*) -> $rettype; |
235 |
| - |
236 |
| - /// `PTR` will either be `null()` or set to the loaded function. |
237 |
| - static PTR: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut()); |
238 |
| - |
239 |
| - /// Only allow access to the function if it has loaded successfully. |
240 |
| - #[inline(always)] |
241 |
| - #[cfg(not(miri))] |
242 |
| - pub fn option() -> Option<F> { |
243 |
| - unsafe { |
244 |
| - NonNull::new(PTR.load(Ordering::Relaxed)).map(|f| mem::transmute(f)) |
245 |
| - } |
246 |
| - } |
247 |
| - |
248 |
| - // Miri does not understand the way we do preloading |
249 |
| - // therefore load the function here instead. |
250 |
| - #[cfg(miri)] |
251 |
| - pub fn option() -> Option<F> { |
252 |
| - let mut func = NonNull::new(PTR.load(Ordering::Relaxed)); |
253 |
| - if func.is_none() { |
254 |
| - unsafe { Module::new($module).map(preload) }; |
255 |
| - func = NonNull::new(PTR.load(Ordering::Relaxed)); |
256 |
| - } |
257 |
| - unsafe { |
258 |
| - func.map(|f| mem::transmute(f)) |
259 |
| - } |
260 |
| - } |
| 150 | +/// This is rexported from sys::c. You should prefer to import |
| 151 | +/// from there in case this changes again in the future. |
| 152 | +pub mod WaitOnAddress { |
| 153 | + use super::*; |
| 154 | + use crate::mem; |
| 155 | + use crate::ptr; |
| 156 | + use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; |
| 157 | + use crate::sys::c; |
| 158 | + |
| 159 | + static MODULE_NAME: &CStr = ansi_str!("api-ms-win-core-synch-l1-2-0"); |
| 160 | + static SYMBOL_NAME: &CStr = ansi_str!("WaitOnAddress"); |
| 161 | + |
| 162 | + // WaitOnAddress function signature. |
| 163 | + type F = unsafe extern "system" fn( |
| 164 | + Address: c::LPVOID, |
| 165 | + CompareAddress: c::LPVOID, |
| 166 | + AddressSize: c::SIZE_T, |
| 167 | + dwMilliseconds: c::DWORD, |
| 168 | + ); |
| 169 | + |
| 170 | + // A place to store the loaded function atomically. |
| 171 | + static WAIT_ON_ADDRESS: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut()); |
| 172 | + |
| 173 | + // We can skip trying to load again if we already tried. |
| 174 | + static LOAD_MODULE: AtomicBool = AtomicBool::new(true); |
| 175 | + |
| 176 | + #[inline(always)] |
| 177 | + pub fn option() -> Option<F> { |
| 178 | + let f = WAIT_ON_ADDRESS.load(Ordering::Acquire); |
| 179 | + if !f.is_null() { Some(unsafe { mem::transmute(f) }) } else { try_load() } |
| 180 | + } |
261 | 181 |
|
262 |
| - #[allow(unused)] |
263 |
| - pub(in crate::sys) fn preload(module: Module) { |
264 |
| - unsafe { |
265 |
| - static SYMBOL_NAME: &CStr = ansi_str!(sym $symbol); |
266 |
| - if let Some(f) = module.proc_address(SYMBOL_NAME) { |
267 |
| - PTR.store(f.as_ptr(), Ordering::Relaxed); |
268 |
| - } |
269 |
| - } |
| 182 | + #[cold] |
| 183 | + fn try_load() -> Option<F> { |
| 184 | + if LOAD_MODULE.load(Ordering::Acquire) { |
| 185 | + // load the module |
| 186 | + let mut wait_on_address = None; |
| 187 | + if let Some(func) = try_load_inner() { |
| 188 | + WAIT_ON_ADDRESS.store(func.as_ptr(), Ordering::Release); |
| 189 | + wait_on_address = Some(unsafe { mem::transmute(func) }); |
270 | 190 | }
|
| 191 | + // Don't try to load the module again even if loading failed. |
| 192 | + LOAD_MODULE.store(false, Ordering::Release); |
| 193 | + wait_on_address |
| 194 | + } else { |
| 195 | + None |
271 | 196 | }
|
272 |
| - )*) |
| 197 | + } |
| 198 | + |
| 199 | + // In the future this could be a `try` block but until then I think it's a |
| 200 | + // little bit cleaner as a separate function. |
| 201 | + fn try_load_inner() -> Option<NonNull<c_void>> { |
| 202 | + unsafe { Module::new(MODULE_NAME)?.proc_address(SYMBOL_NAME) } |
| 203 | + } |
273 | 204 | }
|
0 commit comments