Skip to content

Simd emulation #650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ perf.data.old
/build_sysroot/sysroot_src
/build_sysroot/Cargo.lock
/rust
/regex
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ $ RUSTFLAGS="-Cpanic=abort -Zcodegen-backend=$cg_clif_dir/target/debug/librustc_
* Good non-rust abi support ([vectors are passed by-ref](https://github.com/bjorn3/rustc_codegen_cranelift/issues/10))
* Checked binops ([some missing instructions in cranelift](https://github.com/CraneStation/cranelift/issues/460))
* Inline assembly ([no cranelift support](https://github.com/CraneStation/cranelift/issues/444))
* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171))
* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), some basic things work)

## Troubleshooting

Expand Down
26 changes: 11 additions & 15 deletions build_sysroot/build_sysroot.sh
Original file line number Diff line number Diff line change
@@ -1,34 +1,30 @@
#!/bin/bash

# Requires the CHANNEL env var to be set to `debug` or `release.`

set -e
cd $(dirname "$0")

pushd ../ >/dev/null
source ./config.sh
popd >/dev/null

# Cleanup for previous run
# v Clean target dir except for build scripts and incremental cache
rm -r target/*/{debug,release}/{build,deps,examples,libsysroot*,native} || true
rm Cargo.lock 2>/dev/null || true
rm -r sysroot 2>/dev/null || true

# FIXME find a better way to get the target triple
unamestr=`uname`
if [[ "$unamestr" == 'Linux' ]]; then
TARGET_TRIPLE='x86_64-unknown-linux-gnu'
elif [[ "$unamestr" == 'Darwin' ]]; then
TARGET_TRIPLE='x86_64-apple-darwin'
else
echo "Unsupported os"
exit 1
fi

# Build libs
mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
export RUSTFLAGS="$RUSTFLAGS -Z force-unstable-if-unmarked"
if [[ "$1" == "--release" ]]; then
channel='release'
sysroot_channel='release'
RUSTFLAGS="$RUSTFLAGS -Zmir-opt-level=3" cargo build --target $TARGET_TRIPLE --release
else
channel='debug'
sysroot_channel='debug'
cargo build --target $TARGET_TRIPLE
fi

# Copy files to sysroot
cp target/$TARGET_TRIPLE/$channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
cp target/$TARGET_TRIPLE/$sysroot_channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
14 changes: 14 additions & 0 deletions cargo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

if [ -z $CHANNEL ]; then
export CHANNEL='debug'
fi

pushd $(dirname "$0") >/dev/null
source config.sh
popd >/dev/null

cmd=$1
shift

cargo $cmd --target $TARGET_TRIPLE $@
1 change: 1 addition & 0 deletions clean_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
set -e

rm -rf target/ build_sysroot/{sysroot/,sysroot_src/,target/,Cargo.lock} perf.data{,.old}
rm -rf regex/
10 changes: 2 additions & 8 deletions config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,8 @@ else
exit 1
fi

if [[ "$1" == "--release" ]]; then
channel='release'
cargo build --release
else
channel='debug'
cargo build
fi
TARGET_TRIPLE=$(rustc -vV | grep host | cut -d: -f2 | tr -d " ")

export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$channel'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot'
export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$CHANNEL'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot'
RUSTC="rustc $RUSTFLAGS -L crate=target/out --out-dir target/out"
export RUSTC_LOG=warn # display metadata load errors
34 changes: 34 additions & 0 deletions crate_patches/regex.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
From febff2a8c639efb5de1e1b4758cdb473847d80ce Mon Sep 17 00:00:00 2001
From: bjorn3 <[email protected]>
Date: Tue, 30 Jul 2019 12:12:37 +0200
Subject: [PATCH] Disable threads in shootout-regex-dna example

---
examples/shootout-regex-dna.rs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs
index 2171bb3..37382f8 100644
--- a/examples/shootout-regex-dna.rs
+++ b/examples/shootout-regex-dna.rs
@@ -37,7 +37,7 @@ fn main() {
for variant in variants {
let seq = seq_arc.clone();
let restr = variant.to_string();
- let future = thread::spawn(move || variant.find_iter(&seq).count());
+ let future = variant.find_iter(&seq).count();
counts.push((restr, future));
}

@@ -60,7 +60,7 @@ fn main() {
}

for (variant, count) in counts {
- println!("{} {}", variant, count.join().unwrap());
+ println!("{} {}", variant, count);
}
println!("\n{}\n{}\n{}", ilen, clen, seq.len());
}
--
2.11.0

10 changes: 10 additions & 0 deletions example/mini_core_hello_world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,23 @@ impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsiz
fn take_f32(_f: f32) {}
fn take_unique(_u: Unique<()>) {}

fn return_u128_pair() -> (u128, u128) {
(0, 0)
}

fn call_return_u128_pair() {
return_u128_pair();
}

fn main() {
take_unique(Unique {
pointer: 0 as *const (),
_marker: PhantomData,
});
take_f32(0.1);

call_return_u128_pair();

//return;

unsafe {
Expand Down
125 changes: 125 additions & 0 deletions example/std_example.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#![feature(core_intrinsics)]

use std::arch::x86_64::*;
use std::io::Write;
use std::intrinsics;


fn main() {
let _ = ::std::iter::repeat('a' as u8).take(10).collect::<Vec<_>>();
let stderr = ::std::io::stderr();
Expand Down Expand Up @@ -43,6 +45,129 @@ fn main() {
assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 >> 64, 0xFEDCBA98765432u128);
assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 as i128 >> 64, 0xFEDCBA98765432i128);
assert_eq!(353985398u128 * 932490u128, 330087843781020u128);

unsafe {
test_simd();
}
}

#[target_feature(enable = "sse2")]
unsafe fn test_simd() {
let x = _mm_setzero_si128();
let y = _mm_set1_epi16(7);
let or = _mm_or_si128(x, y);
let cmp_eq = _mm_cmpeq_epi8(y, y);
let cmp_lt = _mm_cmplt_epi8(y, y);

assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]);
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]);
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);

test_mm_slli_si128();
test_mm_movemask_epi8();
test_mm256_movemask_epi8();
test_mm_add_epi8();
test_mm_add_pd();

let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
assert_eq!(mask1, 1);
}

#[target_feature(enable = "sse2")]
unsafe fn test_mm_slli_si128() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm_slli_si128(a, 1);
let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);

#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm_slli_si128(a, 15);
let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
assert_eq_m128i(r, e);

#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm_slli_si128(a, 16);
assert_eq_m128i(r, _mm_set1_epi8(0));

#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm_slli_si128(a, -1);
assert_eq_m128i(_mm_set1_epi8(0), r);

#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm_slli_si128(a, -0x80000000);
assert_eq_m128i(r, _mm_set1_epi8(0));
}

#[target_feature(enable = "sse2")]
unsafe fn test_mm_movemask_epi8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
0b0101, 0b1111_0000u8 as i8, 0, 0,
0, 0, 0b1111_0000u8 as i8, 0b0101,
0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
);
let r = _mm_movemask_epi8(a);
assert_eq!(r, 0b10100100_00100101);
}

#[target_feature(enable = "avx2")]
unsafe fn test_mm256_movemask_epi8() {
let a = _mm256_set1_epi8(-1);
let r = _mm256_movemask_epi8(a);
let e = -1;
assert_eq!(r, e);
}

#[target_feature(enable = "sse2")]
unsafe fn test_mm_add_epi8() {
let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#[rustfmt::skip]
let b = _mm_setr_epi8(
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
);
let r = _mm_add_epi8(a, b);
#[rustfmt::skip]
let e = _mm_setr_epi8(
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
);
assert_eq_m128i(r, e);
}

#[target_feature(enable = "sse2")]
unsafe fn test_mm_add_pd() {
let a = _mm_setr_pd(1.0, 2.0);
let b = _mm_setr_pd(5.0, 10.0);
let r = _mm_add_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
}

fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
unsafe {
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
}
}

#[target_feature(enable = "sse2")]
pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
panic!("{:?} != {:?}", a, b);
}
}

#[derive(PartialEq)]
Expand Down
13 changes: 13 additions & 0 deletions patches/0015-Remove-usage-of-unsized-locals.patch
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,18 @@ index b2142e7..718bb1c 100644
}

pub fn min_stack() -> usize {
diff --git a/src/libstd/sys/unix/thread.rs b/src/libstd/sys/unix/thread.rs
index f4a1783..362b537 100644
--- a/src/libstd/sys/unix/thread.rs
+++ b/src/libstd/sys/unix/thread.rs
@@ -40,6 +40,8 @@ impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>)
-> io::Result<Thread> {
+ panic!("Threads are not yet supported, because cranelift doesn't support atomics.");
+
let p = box p;
let mut native: libc::pthread_t = mem::zeroed();
let mut attr: libc::pthread_attr_t = mem::zeroed();
--
2.20.1 (Apple Git-117)
25 changes: 25 additions & 0 deletions patches/0016-Disable-cpuid-intrinsic.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
From 7403e2998345ef0650fd50628d7098d4d1e88e5c Mon Sep 17 00:00:00 2001
From: bjorn3 <[email protected]>
Date: Sat, 6 Apr 2019 12:16:21 +0200
Subject: [PATCH] Remove usage of unsized locals

---
src/stdarch/crates/core_arch/src/x86/cpuid.rs | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/src/stdarch/crates/core_arch/src/x86/cpuid.rs b/src/stdarch/crates/core_arch/src/x86/cpuid.rs
index f313c42..ff952bc 100644
--- a/src/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/src/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -84,6 +84,9 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
/// Does the host support the `cpuid` instruction?
#[inline]
pub fn has_cpuid() -> bool {
+ // __cpuid intrinsic is not yet implemented
+ return false;
+
#[cfg(target_env = "sgx")]
{
false
--
2.20.1 (Apple Git-117)
7 changes: 7 additions & 0 deletions prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@ set -e
rustup component add rust-src
./build_sysroot/prepare_sysroot_src.sh
cargo install hyperfine || echo "Skipping hyperfine install"

git clone https://github.com/rust-lang/regex.git || echo "rust-lang/regex has already been cloned"
pushd regex
git checkout -- .
git checkout 341f207c1071f7290e3f228c710817c280c8dca1
git apply ../crate_patches/regex.patch
popd
Loading