Feat: completely remove support for 32-bit cuda in the codegen

2026-06-01 05:39:48 +09:00 · 2022-04-10 14:48:32 -04:00
parent 8bd1281ab5
commit ef4fcc86d4
7 changed files with 15 additions and 83 deletions
--- a/crates/cuda_builder/src/lib.rs
+++ b/crates/cuda_builder/src/lib.rs
@@ -63,10 +63,6 @@ pub struct CudaBuilder {
    /// Whether to compile the gpu crate for release.
    /// `true` by default.
    pub release: bool,
-    /// Whether to use 32 bit nvptx. Note that this is not tested much, so
-    /// it may break in certain cases. You should always use 64 bit nvptx.
-    /// `false` by default.
-    pub nvptx_32: bool,
    /// An optional path to copy the final ptx file to.
    pub ptx_file_copy_path: Option<PathBuf>,

@@ -147,7 +143,6 @@ impl CudaBuilder {
        Self {
            path_to_crate: path_to_crate_root.as_ref().to_owned(),
            release: true,
-            nvptx_32: false,
            ptx_file_copy_path: None,
            generate_line_info: true,
            nvvm_opts: true,
@@ -184,13 +179,6 @@ impl CudaBuilder {
        self
    }

-    /// Whether to use 32 bit nvptx. Note that this is not tested much, so
-    /// it may break in certain cases. You should always use 64 bit nvptx.
-    pub fn nvptx_32(mut self, nvptx_32: bool) -> Self {
-        self.nvptx_32 = nvptx_32;
-        self
-    }
-
    /// Whether to generate debug line number info.
    /// This defaults to `true`, but nothing will be generated
    /// if the gpu crate is built as release.
@@ -427,12 +415,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
        rustflags.push(["-Cllvm-args=", &llvm_args].concat());
    }

-    let target = if builder.nvptx_32 {
-        "nvptx-nvidia-cuda"
-    } else {
-        "nvptx64-nvidia-cuda"
-    };
-
    let mut cargo = Command::new("cargo");
    cargo.args(&[
        "build",
@@ -440,7 +422,7 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
        "--message-format=json-render-diagnostics",
        "-Zbuild-std=core,alloc",
        "--target",
-        target,
+        "nvptx64-nvidia-cuda",
    ]);

    cargo.args(&builder.build_args);
--- a/crates/rustc_codegen_nvvm/CHANGELOG.md
+++ b/crates/rustc_codegen_nvvm/CHANGELOG.md
@@ -5,6 +5,7 @@ Notable changes to this project will be documented in this file.
 ## Unreleased

 - Added symbols for cuda_std to link to for warp intrinsics.
+- Completely remove support for 32-bit CUDA (it was broken and it is essentially unused nowadays).

 ## 0.2.3 - 1/2/22

--- a/crates/rustc_codegen_nvvm/src/const_ty.rs
+++ b/crates/rustc_codegen_nvvm/src/const_ty.rs
@@ -1,5 +1,5 @@
 use crate::llvm::{self, Bool, False, True, Type, Value};
-use crate::{consts::const_alloc_to_llvm, context::CodegenCx, target, ty::LayoutLlvmExt};
+use crate::{consts::const_alloc_to_llvm, context::CodegenCx, ty::LayoutLlvmExt};
 use abi::Primitive::Pointer;
 use libc::c_uint;
 use rustc_ast::Mutability;
@@ -59,13 +59,6 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
    }

    fn const_usize(&self, i: u64) -> &'ll Value {
-        let bit_size = target::pointer_size();
-        if bit_size == 32 {
-            // shouldnt happen but make sure it doesnt overflow
-            // and the entire codegen burns down to the ground
-            assert!(i < (1 << bit_size));
-        }
-
        self.const_uint(self.isize_ty, i)
    }

--- a/crates/rustc_codegen_nvvm/src/context.rs
+++ b/crates/rustc_codegen_nvvm/src/context.rs
@@ -110,7 +110,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
            llvm_module.llmod.as_ref().unwrap()
        });

-        let isize_ty = Type::ix_llcx(llcx, target::pointer_size() as u64);
+        let isize_ty = Type::ix_llcx(llcx, target::POINTER_WIDTH as u64);
        // the eh_personality function doesnt make sense on the GPU, but we still need to give
        // rustc something, so we just give it an empty function
        let eh_personality = unsafe {
--- a/crates/rustc_codegen_nvvm/src/intrinsic.rs
+++ b/crates/rustc_codegen_nvvm/src/intrinsic.rs
@@ -152,11 +152,8 @@ fn get_simple_intrinsic<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, name: Symbol) -> O

 fn int_type_width_signed(ty: Ty<'_>, _cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {
    match ty.kind() {
-        ty::Int(t) => Some((t.bit_width().unwrap_or(target::pointer_size() as u64), true)),
-        ty::Uint(t) => Some((
-            t.bit_width().unwrap_or(target::pointer_size() as u64),
-            false,
-        )),
+        ty::Int(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), true)),
+        ty::Uint(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), false)),
        _ => None,
    }
 }
--- a/crates/rustc_codegen_nvvm/src/lib.rs
+++ b/crates/rustc_codegen_nvvm/src/lib.rs
@@ -317,10 +317,10 @@ pub(crate) unsafe fn create_module<'ll>(
    let mod_name = CString::new(mod_name).expect("nul in module name");
    let llmod = llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx);

-    let data_layout = CString::new(target::data_layout()).unwrap();
+    let data_layout = CString::new(target::DATA_LAYOUT).unwrap();
    llvm::LLVMSetDataLayout(llmod, data_layout.as_ptr());

-    let target = CString::new(target::target_triple()).unwrap();
+    let target = CString::new(target::TARGET_TRIPLE).unwrap();
    llvm::LLVMSetTarget(llmod, target.as_ptr());

    llmod
--- a/crates/rustc_codegen_nvvm/src/target.rs
+++ b/crates/rustc_codegen_nvvm/src/target.rs
@@ -1,62 +1,21 @@
-//! Utility handlers for 32 bit and 64 bit nvptx targets
-//!
-//! NVVM IR only supports nvptx64-nvidia-cuda and nvptx-nvidia-cuda
-//! Therefore we completely ignore the target set in the session.
-//! This allows the user to cfg for targets like arm/x86/etc while still
-//! compiling for nvptx
-
 use crate::llvm::{self, Type};
 use rustc_target::spec::{LinkerFlavor, MergeFunctions, PanicStrategy, Target, TargetOptions};
-use std::sync::atomic::{AtomicBool, Ordering};

-/// Whether we are compiling for 32 bit (nvptx-nvidia-cuda).
-/// This is a global variable so we don't have to pass around a variable to
-/// a lot of things when this never varies across codegen invocations.
-static TARGET_32_BIT: AtomicBool = AtomicBool::new(false);
-
-/// The data layouts of NVVM targets
-/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#data-layout>
-pub fn data_layout() -> &'static str {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-    } else {
-        "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-    }
-}
-
-/// The target triples of NVVM targets
-/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#target-triple>
-pub fn target_triple() -> &'static str {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        "nvptx-nvidia-cuda"
-    } else {
-        "nvptx64-nvidia-cuda"
-    }
-}
+pub const DATA_LAYOUT: &str = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
+pub const TARGET_TRIPLE: &str = "nvptx64-nvidia-cuda";
+pub const POINTER_WIDTH: u32 = 64;

 /// The pointer width of the current target
 pub(crate) unsafe fn usize_ty(llcx: &'_ llvm::Context) -> &'_ Type {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        llvm::LLVMInt32TypeInContext(llcx)
-    } else {
-        llvm::LLVMInt64TypeInContext(llcx)
-    }
-}
-
-pub fn pointer_size() -> usize {
-    if TARGET_32_BIT.load(Ordering::SeqCst) {
-        32
-    } else {
-        64
-    }
+    llvm::LLVMInt64TypeInContext(llcx)
 }

 pub fn target() -> Target {
    Target {
        arch: "nvptx".to_string(),
-        data_layout: data_layout().to_string(),
-        llvm_target: target_triple().to_string(),
-        pointer_width: pointer_size() as u32,
+        data_layout: DATA_LAYOUT.to_string(),
+        llvm_target: "nvptx64-nvidia-cuda".to_string(),
+        pointer_width: 64,

        options: TargetOptions {
            os: "cuda".to_string(),