Skip to content

Commit 1bf24cd

Browse files
author
Jorge Aparicio
committed
Merge pull request rust-lang#3 from japaric/ptx
initial support for PTX generation
2 parents 5e18b4b + aed5537 commit 1bf24cd

File tree

19 files changed

+296
-48
lines changed

19 files changed

+296
-48
lines changed

configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1718,7 +1718,7 @@ do
17181718
CMAKE_ARGS="$CMAKE_ARGS -DLLVM_ENABLE_ASSERTIONS=ON"
17191719
fi
17201720

1721-
CMAKE_ARGS="$CMAKE_ARGS -DLLVM_TARGETS_TO_BUILD='X86;ARM;AArch64;Mips;PowerPC'"
1721+
CMAKE_ARGS="$CMAKE_ARGS -DLLVM_TARGETS_TO_BUILD='X86;ARM;AArch64;Mips;PowerPC;NVPTX'"
17221722
CMAKE_ARGS="$CMAKE_ARGS -G '$CFG_CMAKE_GENERATOR'"
17231723
CMAKE_ARGS="$CMAKE_ARGS $CFG_LLVM_SRC_DIR"
17241724

mk/main.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ endif
296296
# LLVM macros
297297
######################################################################
298298

299-
LLVM_OPTIONAL_COMPONENTS=x86 arm aarch64 mips powerpc pnacl
299+
LLVM_OPTIONAL_COMPONENTS=x86 arm aarch64 mips powerpc pnacl nvptx
300300
LLVM_REQUIRED_COMPONENTS=ipo bitreader bitwriter linker asmparser mcjit \
301301
interpreter instrumentation
302302

src/bootstrap/native.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ pub fn llvm(build: &Build, target: &str) {
6565
.out_dir(&dst)
6666
.profile(if build.config.llvm_optimize {"Release"} else {"Debug"})
6767
.define("LLVM_ENABLE_ASSERTIONS", assertions)
68-
.define("LLVM_TARGETS_TO_BUILD", "X86;ARM;AArch64;Mips;PowerPC")
68+
.define("LLVM_TARGETS_TO_BUILD", "X86;ARM;AArch64;Mips;PowerPC;NVPTX")
6969
.define("LLVM_INCLUDE_EXAMPLES", "OFF")
7070
.define("LLVM_INCLUDE_TESTS", "OFF")
7171
.define("LLVM_INCLUDE_DOCS", "OFF")

src/jemalloc

src/libcore/intrinsics.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,4 +596,23 @@ extern "rust-intrinsic" {
596596
/// on MSVC it's `*mut [usize; 2]`. For more information see the compiler's
597597
/// source as well as std's catch implementation.
598598
pub fn try(f: fn(*mut u8), data: *mut u8, local_ptr: *mut u8) -> i32;
599+
600+
}
601+
602+
#[cfg(not(stage0))]
603+
#[cfg(arch = "nvptx")]
604+
extern "rust-intrinsic" {
605+
pub fn thread_idx_x() -> i32;
606+
pub fn thread_idx_y() -> i32;
607+
pub fn thread_idx_z() -> i32;
608+
pub fn block_idx_x() -> i32;
609+
pub fn block_idx_y() -> i32;
610+
pub fn block_idx_z() -> i32;
611+
pub fn block_dim_x() -> i32;
612+
pub fn block_dim_y() -> i32;
613+
pub fn block_dim_z() -> i32;
614+
pub fn grid_dim_x() -> i32;
615+
pub fn grid_dim_y() -> i32;
616+
pub fn grid_dim_z() -> i32;
617+
pub fn syncthreads();
599618
}

src/libcore/num/mod.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,25 @@ macro_rules! int_impl {
187187
$sub_with_overflow:path,
188188
$mul_with_overflow:path) => {
189189
/// Returns the smallest value that can be represented by this integer type.
190+
///
191+
/// # Examples
192+
///
193+
/// ```
194+
/// assert_eq!(i8::min_value(), -128);
195+
/// ```
190196
#[stable(feature = "rust1", since = "1.0.0")]
191197
#[inline]
192198
pub const fn min_value() -> Self {
193199
(-1 as Self) << ($BITS - 1)
194200
}
195201

196202
/// Returns the largest value that can be represented by this integer type.
203+
///
204+
/// # Examples
205+
///
206+
/// ```
207+
/// assert_eq!(i8::max_value(), 127);
208+
/// ```
197209
#[stable(feature = "rust1", since = "1.0.0")]
198210
#[inline]
199211
pub const fn max_value() -> Self {
@@ -287,6 +299,8 @@ macro_rules! int_impl {
287299
/// Shifts the bits to the left by a specified amount, `n`,
288300
/// wrapping the truncated bits to the end of the resulting integer.
289301
///
302+
/// Please note this isn't the same operation as `<<`!
303+
///
290304
/// # Examples
291305
///
292306
/// Basic usage:
@@ -307,6 +321,8 @@ macro_rules! int_impl {
307321
/// wrapping the truncated bits to the beginning of the resulting
308322
/// integer.
309323
///
324+
/// Please note this isn't the same operation as `>>`!
325+
///
310326
/// # Examples
311327
///
312328
/// Basic usage:
@@ -1249,11 +1265,23 @@ macro_rules! uint_impl {
12491265
$sub_with_overflow:path,
12501266
$mul_with_overflow:path) => {
12511267
/// Returns the smallest value that can be represented by this integer type.
1268+
///
1269+
/// # Examples
1270+
///
1271+
/// ```
1272+
/// assert_eq!(u8::min_value(), 0);
1273+
/// ```
12521274
#[stable(feature = "rust1", since = "1.0.0")]
12531275
#[inline]
12541276
pub const fn min_value() -> Self { 0 }
12551277

12561278
/// Returns the largest value that can be represented by this integer type.
1279+
///
1280+
/// # Examples
1281+
///
1282+
/// ```
1283+
/// assert_eq!(u8::max_value(), 255);
1284+
/// ```
12571285
#[stable(feature = "rust1", since = "1.0.0")]
12581286
#[inline]
12591287
pub const fn max_value() -> Self { !0 }
@@ -1360,6 +1388,8 @@ macro_rules! uint_impl {
13601388
/// Shifts the bits to the left by a specified amount, `n`,
13611389
/// wrapping the truncated bits to the end of the resulting integer.
13621390
///
1391+
/// Please note this isn't the same operation as `<<`!
1392+
///
13631393
/// # Examples
13641394
///
13651395
/// Basic usage:
@@ -1382,6 +1412,8 @@ macro_rules! uint_impl {
13821412
/// wrapping the truncated bits to the beginning of the resulting
13831413
/// integer.
13841414
///
1415+
/// Please note this isn't the same operation as `>>`!
1416+
///
13851417
/// # Examples
13861418
///
13871419
/// Basic usage:

src/librustc_back/target/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ supported_targets! {
141141
("i586-pc-windows-msvc", i586_pc_windows_msvc),
142142

143143
("le32-unknown-nacl", le32_unknown_nacl),
144-
("asmjs-unknown-emscripten", asmjs_unknown_emscripten)
144+
("asmjs-unknown-emscripten", asmjs_unknown_emscripten),
145+
("nvptx-unknown-unknown", nvptx_unknown_unknown),
146+
("nvptx64-unknown-unknown", nvptx64_unknown_unknown)
145147
}
146148

147149
/// Everything `rustc` knows about how to compile for a specific target.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use super::{Target, TargetOptions};
12+
13+
pub fn target() -> Target {
14+
let opts = TargetOptions {
15+
linker: "".to_string(),
16+
ar: "".to_string(),
17+
18+
cpu: "sm_20".to_string(),
19+
dynamic_linking: false,
20+
executables: false,
21+
no_compiler_rt: true,
22+
allow_asm: false,
23+
.. Default::default()
24+
};
25+
Target {
26+
llvm_target: "nvptx64-unknown-unknown".to_string(),
27+
target_endian: "little".to_string(),
28+
target_pointer_width: "64".to_string(),
29+
target_os: "none".to_string(),
30+
target_env: "".to_string(),
31+
target_vendor: "unknown".to_string(),
32+
data_layout: "e-i64:64-v16:16-v32:32-n16:32:64".to_string(),
33+
arch: "nvptx".to_string(),
34+
options: opts,
35+
}
36+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use super::{Target, TargetOptions};
12+
13+
pub fn target() -> Target {
14+
let opts = TargetOptions {
15+
linker: "".to_string(),
16+
ar: "".to_string(),
17+
18+
cpu: "sm_20".to_string(),
19+
dynamic_linking: false,
20+
executables: false,
21+
no_compiler_rt: true,
22+
allow_asm: false,
23+
.. Default::default()
24+
};
25+
Target {
26+
llvm_target: "nvptx-unknown-unknown".to_string(),
27+
target_endian: "little".to_string(),
28+
target_pointer_width: "32".to_string(),
29+
target_os: "none".to_string(),
30+
target_env: "".to_string(),
31+
target_vendor: "unknown".to_string(),
32+
data_layout: "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64".to_string(),
33+
arch: "nvptx".to_string(),
34+
options: opts,
35+
}
36+
}

src/librustc_llvm/build.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ fn main() {
6666
let host = env::var("HOST").unwrap();
6767
let is_crossed = target != host;
6868

69-
let optional_components = ["x86", "arm", "aarch64", "mips", "powerpc", "pnacl"];
69+
let optional_components = ["x86", "arm", "aarch64", "mips", "powerpc", "pnacl", "nvptx"];
7070

7171
// FIXME: surely we don't need all these components, right? Stuff like mcjit
7272
// or interpreter the compiler itself never uses.

src/librustc_llvm/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,6 +2407,11 @@ pub fn initialize_available_targets() {
24072407
LLVMInitializeMipsTargetMC,
24082408
LLVMInitializeMipsAsmPrinter,
24092409
LLVMInitializeMipsAsmParser);
2410+
init_target!(llvm_component = "nvptx",
2411+
LLVMInitializeNVPTXTargetInfo,
2412+
LLVMInitializeNVPTXTarget,
2413+
LLVMInitializeNVPTXTargetMC,
2414+
LLVMInitializeNVPTXAsmPrinter);
24102415
init_target!(llvm_component = "powerpc",
24112416
LLVMInitializePowerPCTargetInfo,
24122417
LLVMInitializePowerPCTarget,

src/librustc_trans/closure.rs

Lines changed: 48 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,14 @@ fn get_or_create_closure_declaration<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
169169
}));
170170
let llfn = declare::declare_fn(ccx, &symbol, function_type);
171171

172-
// set an inline hint for all closures
173-
attributes::inline(llfn, attributes::InlineAttr::Hint);
174172
attributes::set_frame_pointer_elimination(ccx, llfn);
175173

176174
debug!("get_or_create_declaration_if_closure(): inserting new \
177175
closure {:?}: {:?}",
178176
instance, Value(llfn));
179-
ccx.instances().borrow_mut().insert(instance, llfn);
177+
178+
// NOTE: We do *not* store llfn in the ccx.instances() map here,
179+
// that is only done, when the closures body is translated.
180180

181181
llfn
182182
}
@@ -197,8 +197,8 @@ pub fn trans_closure_expr<'a, 'tcx>(dest: Dest<'a, 'tcx>,
197197
// (*) Note that in the case of inlined functions, the `closure_def_id` will be the
198198
// defid of the closure in its original crate, whereas `id` will be the id of the local
199199
// inlined copy.
200-
201-
let param_substs = closure_substs.func_substs;
200+
debug!("trans_closure_expr(id={:?}, closure_def_id={:?}, closure_substs={:?})",
201+
id, closure_def_id, closure_substs);
202202

203203
let ccx = match dest {
204204
Dest::SaveIn(bcx, _) => bcx.ccx(),
@@ -207,41 +207,49 @@ pub fn trans_closure_expr<'a, 'tcx>(dest: Dest<'a, 'tcx>,
207207
let tcx = ccx.tcx();
208208
let _icx = push_ctxt("closure::trans_closure_expr");
209209

210-
debug!("trans_closure_expr(id={:?}, closure_def_id={:?}, closure_substs={:?})",
211-
id, closure_def_id, closure_substs);
212-
213-
let llfn = get_or_create_closure_declaration(ccx, closure_def_id, closure_substs);
214-
llvm::SetLinkage(llfn, llvm::WeakODRLinkage);
215-
llvm::SetUniqueComdat(ccx.llmod(), llfn);
216-
217-
// Get the type of this closure. Use the current `param_substs` as
218-
// the closure substitutions. This makes sense because the closure
219-
// takes the same set of type arguments as the enclosing fn, and
220-
// this function (`trans_closure`) is invoked at the point
221-
// of the closure expression.
222-
223-
let sig = &tcx.closure_type(closure_def_id, closure_substs).sig;
224-
let sig = tcx.erase_late_bound_regions(sig);
225-
let sig = tcx.normalize_associated_type(&sig);
226-
227-
let closure_type = tcx.mk_closure_from_closure_substs(closure_def_id,
228-
closure_substs);
229-
let sig = ty::FnSig {
230-
inputs: Some(get_self_type(tcx, closure_def_id, closure_type))
231-
.into_iter().chain(sig.inputs).collect(),
232-
output: sig.output,
233-
variadic: false
234-
};
235-
236-
trans_closure(ccx,
237-
decl,
238-
body,
239-
llfn,
240-
Instance::new(closure_def_id, param_substs),
241-
id,
242-
&sig,
243-
Abi::RustCall,
244-
ClosureEnv::Closure(closure_def_id, id));
210+
let param_substs = closure_substs.func_substs;
211+
let instance = Instance::new(closure_def_id, param_substs);
212+
213+
// If we have not done so yet, translate this closure's body
214+
if !ccx.instances().borrow().contains_key(&instance) {
215+
let llfn = get_or_create_closure_declaration(ccx, closure_def_id, closure_substs);
216+
llvm::SetLinkage(llfn, llvm::WeakODRLinkage);
217+
llvm::SetUniqueComdat(ccx.llmod(), llfn);
218+
219+
// set an inline hint for all closures
220+
attributes::inline(llfn, attributes::InlineAttr::Hint);
221+
222+
// Get the type of this closure. Use the current `param_substs` as
223+
// the closure substitutions. This makes sense because the closure
224+
// takes the same set of type arguments as the enclosing fn, and
225+
// this function (`trans_closure`) is invoked at the point
226+
// of the closure expression.
227+
228+
let sig = &tcx.closure_type(closure_def_id, closure_substs).sig;
229+
let sig = tcx.erase_late_bound_regions(sig);
230+
let sig = tcx.normalize_associated_type(&sig);
231+
232+
let closure_type = tcx.mk_closure_from_closure_substs(closure_def_id,
233+
closure_substs);
234+
let sig = ty::FnSig {
235+
inputs: Some(get_self_type(tcx, closure_def_id, closure_type))
236+
.into_iter().chain(sig.inputs).collect(),
237+
output: sig.output,
238+
variadic: false
239+
};
240+
241+
trans_closure(ccx,
242+
decl,
243+
body,
244+
llfn,
245+
Instance::new(closure_def_id, param_substs),
246+
id,
247+
&sig,
248+
Abi::RustCall,
249+
ClosureEnv::Closure(closure_def_id, id));
250+
251+
ccx.instances().borrow_mut().insert(instance, llfn);
252+
}
245253

246254
// Don't hoist this to the top of the function. It's perfectly legitimate
247255
// to have a zero-size closure (in which case dest will be `Ignore`) and

src/librustc_trans/context.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,19 @@ fn declare_intrinsic(ccx: &CrateContext, key: &str) -> Option<ValueRef> {
10841084
ifn!("llvm.localrecover", fn(i8p, i8p, t_i32) -> i8p);
10851085
ifn!("llvm.x86.seh.recoverfp", fn(i8p, i8p) -> i8p);
10861086

1087+
ifn!("llvm.cuda.syncthreads", fn() -> void);
1088+
ifn!("llvm.nvvm.read.ptx.sreg.tid.x", fn() -> t_i32);
1089+
ifn!("llvm.nvvm.read.ptx.sreg.tid.y", fn() -> t_i32);
1090+
ifn!("llvm.nvvm.read.ptx.sreg.tid.z", fn() -> t_i32);
1091+
ifn!("llvm.nvvm.read.ptx.sreg.ctaid.x", fn() -> t_i32);
1092+
ifn!("llvm.nvvm.read.ptx.sreg.ctaid.y", fn() -> t_i32);
1093+
ifn!("llvm.nvvm.read.ptx.sreg.ctaid.z", fn() -> t_i32);
1094+
ifn!("llvm.nvvm.read.ptx.sreg.ntid.x", fn() -> t_i32);
1095+
ifn!("llvm.nvvm.read.ptx.sreg.ntid.y", fn() -> t_i32);
1096+
ifn!("llvm.nvvm.read.ptx.sreg.ntid.z", fn() -> t_i32);
1097+
ifn!("llvm.nvvm.read.ptx.sreg.nctaid.x", fn() -> t_i32);
1098+
ifn!("llvm.nvvm.read.ptx.sreg.nctaid.y", fn() -> t_i32);
1099+
ifn!("llvm.nvvm.read.ptx.sreg.nctaid.z", fn() -> t_i32);
10871100
ifn!("llvm.assume", fn(i1) -> void);
10881101

10891102
if ccx.sess().opts.debuginfo != NoDebugInfo {

src/librustc_trans/intrinsic.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,19 @@ fn get_simple_intrinsic(ccx: &CrateContext, name: &str) -> Option<ValueRef> {
8989
"roundf32" => "llvm.round.f32",
9090
"roundf64" => "llvm.round.f64",
9191
"assume" => "llvm.assume",
92+
"thread_idx_x" => "llvm.nvvm.read.ptx.sreg.tid.x",
93+
"thread_idx_y" => "llvm.nvvm.read.ptx.sreg.tid.y",
94+
"thread_idx_z" => "llvm.nvvm.read.ptx.sreg.tid.z",
95+
"block_idx_x" => "llvm.nvvm.read.ptx.sreg.ctaid.x",
96+
"block_idx_y" => "llvm.nvvm.read.ptx.sreg.ctaid.y",
97+
"block_idx_z" => "llvm.nvvm.read.ptx.sreg.ctaid.z",
98+
"block_dim_x" => "llvm.nvvm.read.ptx.sreg.ntid.x",
99+
"block_dim_y" => "llvm.nvvm.read.ptx.sreg.ntid.y",
100+
"block_dim_z" => "llvm.nvvm.read.ptx.sreg.ntid.z",
101+
"grid_dim_x" => "llvm.nvvm.read.ptx.sreg.nctaid.x",
102+
"grid_dim_y" => "llvm.nvvm.read.ptx.sreg.nctaid.y",
103+
"grid_dim_z" => "llvm.nvvm.read.ptx.sreg.nctaid.z",
104+
"syncthreads" => "llvm.cuda.syncthreads",
92105
_ => return None
93106
};
94107
Some(ccx.get_intrinsic(&llvm_name))

0 commit comments

Comments
 (0)