Feat: completely remove support for 32-bit cuda in the codegen

RDambrosio016 · RDambrosio016 · commit ef4fcc86d46f · 2022-04-10T14:48:32.000-04:00
diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs
@@ -63,10 +63,6 @@ pub struct CudaBuilder {
     /// Whether to compile the gpu crate for release.
     /// `true` by default.
     pub release: bool,
-    /// Whether to use 32 bit nvptx. Note that this is not tested much, so
-    /// it may break in certain cases. You should always use 64 bit nvptx.
-    /// `false` by default.
-    pub nvptx_32: bool,
     /// An optional path to copy the final ptx file to.
     pub ptx_file_copy_path: Option<PathBuf>,
 
@@ -147,7 +143,6 @@ impl CudaBuilder {
         Self {
             path_to_crate: path_to_crate_root.as_ref().to_owned(),
             release: true,
-            nvptx_32: false,
             ptx_file_copy_path: None,
             generate_line_info: true,
             nvvm_opts: true,
@@ -184,13 +179,6 @@ impl CudaBuilder {
         self
     }
 
-    /// Whether to use 32 bit nvptx. Note that this is not tested much, so
-    /// it may break in certain cases. You should always use 64 bit nvptx.
-    pub fn nvptx_32(mut self, nvptx_32: bool) -> Self {
-        self.nvptx_32 = nvptx_32;
-        self
-    }
-
     /// Whether to generate debug line number info.
     /// This defaults to `true`, but nothing will be generated
     /// if the gpu crate is built as release.
@@ -427,20 +415,14 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
         rustflags.push(["-Cllvm-args=", &llvm_args].concat());
     }
 
-    let target = if builder.nvptx_32 {
-        "nvptx-nvidia-cuda"
-    } else {
-        "nvptx64-nvidia-cuda"
-    };
-
     let mut cargo = Command::new("cargo");
     cargo.args(&[
         "build",
         "--lib",
         "--message-format=json-render-diagnostics",
         "-Zbuild-std=core,alloc",
         "--target",
-        target,
+        "nvptx64-nvidia-cuda",
     ]);
 
     cargo.args(&builder.build_args);
diff --git a/crates/rustc_codegen_nvvm/CHANGELOG.md b/crates/rustc_codegen_nvvm/CHANGELOG.md
@@ -5,6 +5,7 @@ Notable changes to this project will be documented in this file.
 ## Unreleased
 
 - Added symbols for cuda_std to link to for warp intrinsics.
+- Completely remove support for 32-bit CUDA (it was broken and it is essentially unused nowadays).
 
 ## 0.2.3 - 1/2/22
 
diff --git a/crates/rustc_codegen_nvvm/src/const_ty.rs b/crates/rustc_codegen_nvvm/src/const_ty.rs
@@ -1,5 +1,5 @@
 use crate::llvm::{self, Bool, False, True, Type, Value};
-use crate::{consts::const_alloc_to_llvm, context::CodegenCx, target, ty::LayoutLlvmExt};
+use crate::{consts::const_alloc_to_llvm, context::CodegenCx, ty::LayoutLlvmExt};
 use abi::Primitive::Pointer;
 use libc::c_uint;
 use rustc_ast::Mutability;
@@ -59,13 +59,6 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     }
 
     fn const_usize(&self, i: u64) -> &'ll Value {
-        let bit_size = target::pointer_size();
-        if bit_size == 32 {
-            // shouldnt happen but make sure it doesnt overflow
-            // and the entire codegen burns down to the ground
-            assert!(i < (1 << bit_size));
-        }
-
         self.const_uint(self.isize_ty, i)
     }
 
diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs
@@ -110,7 +110,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm_module.llmod.as_ref().unwrap()
         });
 
-        let isize_ty = Type::ix_llcx(llcx, target::pointer_size() as u64);
+        let isize_ty = Type::ix_llcx(llcx, target::POINTER_WIDTH as u64);
         // the eh_personality function doesnt make sense on the GPU, but we still need to give
         // rustc something, so we just give it an empty function
         let eh_personality = unsafe {
diff --git a/crates/rustc_codegen_nvvm/src/intrinsic.rs b/crates/rustc_codegen_nvvm/src/intrinsic.rs
@@ -152,11 +152,8 @@ fn get_simple_intrinsic<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, name: Symbol) -> O
 
 fn int_type_width_signed(ty: Ty<'_>, _cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {
     match ty.kind() {
-        ty::Int(t) => Some((t.bit_width().unwrap_or(target::pointer_size() as u64), true)),
-        ty::Uint(t) => Some((
-            t.bit_width().unwrap_or(target::pointer_size() as u64),
-            false,
-        )),
+        ty::Int(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), true)),
+        ty::Uint(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), false)),
         _ => None,
     }
 }
diff --git a/crates/rustc_codegen_nvvm/src/lib.rs b/crates/rustc_codegen_nvvm/src/lib.rs
@@ -317,10 +317,10 @@ pub(crate) unsafe fn create_module<'ll>(
     let mod_name = CString::new(mod_name).expect("nul in module name");
     let llmod = llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx);
 
-    let data_layout = CString::new(target::data_layout()).unwrap();
+    let data_layout = CString::new(target::DATA_LAYOUT).unwrap();
     llvm::LLVMSetDataLayout(llmod, data_layout.as_ptr());
 
-    let target = CString::new(target::target_triple()).unwrap();
+    let target = CString::new(target::TARGET_TRIPLE).unwrap();
     llvm::LLVMSetTarget(llmod, target.as_ptr());
 
     llmod
diff --git a/crates/rustc_codegen_nvvm/src/target.rs b/crates/rustc_codegen_nvvm/src/target.rs
@@ -1,62 +1,21 @@
-//! Utility handlers for 32 bit and 64 bit nvptx targets
-//!
-//! NVVM IR only supports nvptx64-nvidia-cuda and nvptx-nvidia-cuda
-//! Therefore we completely ignore the target set in the session.
-//! This allows the user to cfg for targets like arm/x86/etc while still
-//! compiling for nvptx
-
 use crate::llvm::{self, Type};
 use rustc_target::spec::{LinkerFlavor, MergeFunctions, PanicStrategy, Target, TargetOptions};
-use std::sync::atomic::{AtomicBool, Ordering};
-
-/// Whether we are compiling for 32 bit (nvptx-nvidia-cuda).
-/// This is a global variable so we don't have to pass around a variable to
-/// a lot of things when this never varies across codegen invocations.
-static TARGET_32_BIT: AtomicBool = AtomicBool::new(false);
 
-/// The data layouts of NVVM targets
-/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#data-layout>
-pub fn data_layout() -> &'static str {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-    } else {
-        "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-    }
-}
-
-/// The target triples of NVVM targets
-/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#target-triple>
-pub fn target_triple() -> &'static str {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        "nvptx-nvidia-cuda"
-    } else {
-        "nvptx64-nvidia-cuda"
-    }
-}
+pub const DATA_LAYOUT: &str = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
+pub const TARGET_TRIPLE: &str = "nvptx64-nvidia-cuda";
+pub const POINTER_WIDTH: u32 = 64;
 
 /// The pointer width of the current target
 pub(crate) unsafe fn usize_ty(llcx: &'_ llvm::Context) -> &'_ Type {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        llvm::LLVMInt32TypeInContext(llcx)
-    } else {
-        llvm::LLVMInt64TypeInContext(llcx)
-    }
-}
-
-pub fn pointer_size() -> usize {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        32
-    } else {
-        64
-    }
+    llvm::LLVMInt64TypeInContext(llcx)
 }
 
 pub fn target() -> Target {
     Target {
         arch: "nvptx".to_string(),
-        data_layout: data_layout().to_string(),
-        llvm_target: target_triple().to_string(),
-        pointer_width: pointer_size() as u32,
+        data_layout: DATA_LAYOUT.to_string(),
+        llvm_target: "nvptx64-nvidia-cuda".to_string(),
+        pointer_width: 64,
 
         options: TargetOptions {
             os: "cuda".to_string(),

Original file line number	Diff line number	Diff line change
`@@ -152,11 +152,8 @@ fn get_simple_intrinsic<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, name: Symbol) -> O`
`152`	`152`
`153`	`153`	`fn int_type_width_signed(ty: Ty<'_>, _cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {`
`154`	`154`	`match ty.kind() {`
`155`		`- ty::Int(t) => Some((t.bit_width().unwrap_or(target::pointer_size() as u64), true)),`
`156`		`- ty::Uint(t) => Some((`
`157`		`- t.bit_width().unwrap_or(target::pointer_size() as u64),`
`158`		`- false,`
`159`		`- )),`
	`155`	`+ ty::Int(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), true)),`
	`156`	`+ ty::Uint(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), false)),`
`160`	`157`	`_ => None,`
`161`	`158`	`}`
`162`	`159`	`}`