Skip to content

Commit 259d479

Browse files
authored
Add FXSAVE/FXRSTOR, update Intel SDE, fix xsave tests (rust-lang#205)
* [x86] add run-time detection for fxsr * [x86] add i386 fxsr intrinsics: FXSAVE,FXRSTOR * [x86_64] add x86_64 fxsr intrinsics: FXSAVE64/FXRSTOR64 * [x86-runtime]: document xsave detection further * [x86] disable xsaves and xsaves64 tests
1 parent f3ee983 commit 259d479

File tree

8 files changed

+530
-225
lines changed

8 files changed

+530
-225
lines changed

ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
55
file \
66
make \
77
ca-certificates \
8-
curl \
8+
wget \
99
bzip2
1010

11-
RUN curl https://s3-us-west-1.amazonaws.com/rust-lang-ci2/rust-ci-mirror/sde-external-8.9.0-2017-08-06-lin.tar.bz2 | \
12-
tar xjf -
13-
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.9.0-2017-08-06-lin/sde64 --"
11+
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.12.0-2017-10-23-lin.tar.bz2
12+
RUN tar -xjf sde-external-8.12.0-2017-10-23-lin.tar.bz2
13+
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.12.0-2017-10-23-lin/sde64 --"

coresimd/src/runtime/x86.rs

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ macro_rules! __unstable_detect_feature {
132132
$crate::vendor::__unstable_detect_feature(
133133
$crate::vendor::__Feature::popcnt{})
134134
};
135+
("fxsr") => {
136+
$crate::vendor::__unstable_detect_feature(
137+
$crate::vendor::__Feature::fxsr{})
138+
};
135139
("xsave") => {
136140
$crate::vendor::__unstable_detect_feature(
137141
$crate::vendor::__Feature::xsave{})
@@ -212,6 +216,8 @@ pub enum __Feature {
212216
tbm,
213217
/// POPCNT (Population Count)
214218
popcnt,
219+
/// FXSR (Floating-point context fast save and restor)
220+
fxsr,
215221
/// XSAVE (Save Processor Extended States)
216222
xsave,
217223
/// XSAVEOPT (Save Processor Extended States Optimized)
@@ -325,24 +331,27 @@ pub fn detect_features() -> usize {
325331
enable(proc_info_ecx, 19, __Feature::sse4_1);
326332
enable(proc_info_ecx, 20, __Feature::sse4_2);
327333
enable(proc_info_ecx, 23, __Feature::popcnt);
334+
enable(proc_info_edx, 24, __Feature::fxsr);
328335
enable(proc_info_edx, 25, __Feature::sse);
329336
enable(proc_info_edx, 26, __Feature::sse2);
330337

331338
enable(extended_features_ebx, 3, __Feature::bmi);
332339
enable(extended_features_ebx, 8, __Feature::bmi2);
333340

334341
// `XSAVE` and `AVX` support:
335-
if bit::test(proc_info_ecx as usize, 26) {
342+
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
343+
if cpu_xsave {
336344
// 0. Here the CPU supports `XSAVE`.
337345

338346
// 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
339347
// supports saving the state of the AVX/AVX2 vector registers on
340348
// context-switches, see:
341349
//
342-
// - https://software.intel.
343-
// com/en-us/blogs/2011/04/14/is-avx-enabled
344-
// - https://hg.mozilla.
345-
// org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
350+
// - [intel: is avx enabled?][is_avx_enabled],
351+
// - [mozilla: sse.cpp][mozilla_sse_cpp].
352+
//
353+
// [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
354+
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
346355
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
347356

348357
// 2. The OS must have signaled the CPU that it supports saving and
@@ -354,11 +363,34 @@ pub fn detect_features() -> usize {
354363
let os_avx_support = xcr0 & 6 == 6;
355364
let os_avx512_support = xcr0 & 224 == 224;
356365

366+
// Only if the OS and the CPU support saving/restoring the AVX
367+
// registers we enable `xsave` support:
357368
if cpu_osxsave && os_avx_support {
358-
// Only if the OS and the CPU support saving/restoring the AVX
359-
// registers we enable `xsave` support:
369+
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
370+
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
371+
// Developer’s Manual, Volume 1: Basic Architecture":
372+
//
373+
// "Software enables the XSAVE feature set by setting
374+
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
375+
// instruction). If this bit is 0, execution of any of XGETBV,
376+
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
377+
// causes an invalid-opcode exception (#UD)"
378+
//
360379
enable(proc_info_ecx, 26, __Feature::xsave);
361380

381+
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
382+
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
383+
// ECX = 1):
384+
if max_basic_leaf >= 0xd {
385+
let CpuidResult {
386+
eax: proc_extended_state1_eax,
387+
..
388+
} = unsafe { __cpuid_count(0xd_u32, 1) };
389+
enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
390+
enable(proc_extended_state1_eax, 1, __Feature::xsavec);
391+
enable(proc_extended_state1_eax, 3, __Feature::xsaves);
392+
}
393+
362394
// And AVX/AVX2:
363395
enable(proc_info_ecx, 28, __Feature::avx);
364396
enable(extended_features_ebx, 5, __Feature::avx2);
@@ -382,18 +414,6 @@ pub fn detect_features() -> usize {
382414
);
383415
}
384416
}
385-
386-
// Processor Extended State Enumeration Sub-leaf
387-
// (EAX = 0DH, ECX = 1)
388-
if max_basic_leaf >= 0xd {
389-
let CpuidResult {
390-
eax: proc_extended_state1_eax,
391-
..
392-
} = unsafe { __cpuid_count(0xd_u32, 1) };
393-
enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
394-
enable(proc_extended_state1_eax, 1, __Feature::xsavec);
395-
enable(proc_extended_state1_eax, 3, __Feature::xsaves);
396-
}
397417
}
398418

399419
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
@@ -448,10 +468,11 @@ mod tests {
448468
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
449469
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
450470
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
451-
println!("xsave {:?}", cfg_feature_enabled!("xsave"));
452-
println!("xsaveopt {:?}", cfg_feature_enabled!("xsaveopt"));
453-
println!("xsaves {:?}", cfg_feature_enabled!("xsaves"));
454-
println!("xsavec {:?}", cfg_feature_enabled!("xsavec"));
471+
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
472+
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
473+
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
474+
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
475+
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
455476
}
456477

457478
#[test]

coresimd/src/x86/i386/fxsr.rs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
//! FXSR floating-point context fast save and restor.
2+
3+
#[cfg(test)]
4+
use stdsimd_test::assert_instr;
5+
6+
#[allow(improper_ctypes)]
7+
extern "C" {
8+
#[link_name = "llvm.x86.fxsave"]
9+
fn fxsave(p: *mut u8) -> ();
10+
#[link_name = "llvm.x86.fxrstor"]
11+
fn fxrstor(p: *const u8) -> ();
12+
}
13+
14+
/// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the
15+
/// 512-byte-long 16-byte-aligned memory region `mem_addr`.
16+
///
17+
/// A misaligned destination operand raises a general-protection (#GP) or an
18+
/// alignment check exception (#AC).
19+
///
20+
/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor].
21+
///
22+
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
23+
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
24+
#[inline(always)]
25+
#[target_feature = "+fxsr"]
26+
#[cfg_attr(test, assert_instr(fxsave))]
27+
pub unsafe fn _fxsave(mem_addr: *mut u8) {
28+
fxsave(mem_addr)
29+
}
30+
31+
/// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the
32+
/// 512-byte-long 16-byte-aligned memory region `mem_addr`.
33+
///
34+
/// The contents of this memory region should have been written to by a
35+
/// previous
36+
/// `_fxsave` or `_fxsave64` intrinsic.
37+
///
38+
/// A misaligned destination operand raises a general-protection (#GP) or an
39+
/// alignment check exception (#AC).
40+
///
41+
/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor].
42+
///
43+
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
44+
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
45+
#[inline(always)]
46+
#[target_feature = "+fxsr"]
47+
#[cfg_attr(test, assert_instr(fxrstor))]
48+
pub unsafe fn _fxrstor(mem_addr: *const u8) {
49+
fxrstor(mem_addr)
50+
}
51+
52+
#[cfg(test)]
53+
mod tests {
54+
use x86::i386::fxsr;
55+
use stdsimd_test::simd_test;
56+
use std::fmt;
57+
58+
#[repr(align(16))]
59+
struct FxsaveArea {
60+
data: [u8; 512], // 512 bytes
61+
}
62+
63+
impl FxsaveArea {
64+
fn new() -> FxsaveArea {
65+
FxsaveArea { data: [0; 512] }
66+
}
67+
fn ptr(&mut self) -> *mut u8 {
68+
&mut self.data[0] as *mut _ as *mut u8
69+
}
70+
}
71+
72+
impl PartialEq<FxsaveArea> for FxsaveArea {
73+
fn eq(&self, other: &FxsaveArea) -> bool {
74+
for i in 0..self.data.len() {
75+
if self.data[i] != other.data[i] {
76+
return false;
77+
}
78+
}
79+
true
80+
}
81+
}
82+
83+
impl fmt::Debug for FxsaveArea {
84+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85+
write!(f, "[")?;
86+
for i in 0..self.data.len() {
87+
write!(f, "{}", self.data[i])?;
88+
if i != self.data.len() - 1 {
89+
write!(f, ", ")?;
90+
}
91+
}
92+
write!(f, "]")
93+
}
94+
}
95+
96+
#[simd_test = "fxsr"]
97+
unsafe fn fxsave() {
98+
let mut a = FxsaveArea::new();
99+
let mut b = FxsaveArea::new();
100+
101+
fxsr::_fxsave(a.ptr());
102+
fxsr::_fxrstor(a.ptr());
103+
fxsr::_fxsave(b.ptr());
104+
assert_eq!(a, b);
105+
}
106+
}

coresimd/src/x86/i386/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
33
mod eflags;
44
pub use self::eflags::*;
5+
6+
mod fxsr;
7+
pub use self::fxsr::*;

0 commit comments

Comments
 (0)