mirror of
https://github.com/Rust-GPU/Rust-CUDA.git
synced 2026-06-01 05:39:48 +09:00
Feat: completely remove support for 32-bit cuda in the codegen
This commit is contained in:
@@ -63,10 +63,6 @@ pub struct CudaBuilder {
|
||||
/// Whether to compile the gpu crate for release.
|
||||
/// `true` by default.
|
||||
pub release: bool,
|
||||
/// Whether to use 32 bit nvptx. Note that this is not tested much, so
|
||||
/// it may break in certain cases. You should always use 64 bit nvptx.
|
||||
/// `false` by default.
|
||||
pub nvptx_32: bool,
|
||||
/// An optional path to copy the final ptx file to.
|
||||
pub ptx_file_copy_path: Option<PathBuf>,
|
||||
|
||||
@@ -147,7 +143,6 @@ impl CudaBuilder {
|
||||
Self {
|
||||
path_to_crate: path_to_crate_root.as_ref().to_owned(),
|
||||
release: true,
|
||||
nvptx_32: false,
|
||||
ptx_file_copy_path: None,
|
||||
generate_line_info: true,
|
||||
nvvm_opts: true,
|
||||
@@ -184,13 +179,6 @@ impl CudaBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to use 32 bit nvptx. Note that this is not tested much, so
|
||||
/// it may break in certain cases. You should always use 64 bit nvptx.
|
||||
pub fn nvptx_32(mut self, nvptx_32: bool) -> Self {
|
||||
self.nvptx_32 = nvptx_32;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to generate debug line number info.
|
||||
/// This defaults to `true`, but nothing will be generated
|
||||
/// if the gpu crate is built as release.
|
||||
@@ -427,12 +415,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
|
||||
rustflags.push(["-Cllvm-args=", &llvm_args].concat());
|
||||
}
|
||||
|
||||
let target = if builder.nvptx_32 {
|
||||
"nvptx-nvidia-cuda"
|
||||
} else {
|
||||
"nvptx64-nvidia-cuda"
|
||||
};
|
||||
|
||||
let mut cargo = Command::new("cargo");
|
||||
cargo.args(&[
|
||||
"build",
|
||||
@@ -440,7 +422,7 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
|
||||
"--message-format=json-render-diagnostics",
|
||||
"-Zbuild-std=core,alloc",
|
||||
"--target",
|
||||
target,
|
||||
"nvptx64-nvidia-cuda",
|
||||
]);
|
||||
|
||||
cargo.args(&builder.build_args);
|
||||
|
||||
@@ -5,6 +5,7 @@ Notable changes to this project will be documented in this file.
|
||||
## Unreleased
|
||||
|
||||
- Added symbols for cuda_std to link to for warp intrinsics.
|
||||
- Completely remove support for 32-bit CUDA (it was broken and it is essentially unused nowadays).
|
||||
|
||||
## 0.2.3 - 1/2/22
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::llvm::{self, Bool, False, True, Type, Value};
|
||||
use crate::{consts::const_alloc_to_llvm, context::CodegenCx, target, ty::LayoutLlvmExt};
|
||||
use crate::{consts::const_alloc_to_llvm, context::CodegenCx, ty::LayoutLlvmExt};
|
||||
use abi::Primitive::Pointer;
|
||||
use libc::c_uint;
|
||||
use rustc_ast::Mutability;
|
||||
@@ -59,13 +59,6 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
||||
}
|
||||
|
||||
fn const_usize(&self, i: u64) -> &'ll Value {
|
||||
let bit_size = target::pointer_size();
|
||||
if bit_size == 32 {
|
||||
// shouldnt happen but make sure it doesnt overflow
|
||||
// and the entire codegen burns down to the ground
|
||||
assert!(i < (1 << bit_size));
|
||||
}
|
||||
|
||||
self.const_uint(self.isize_ty, i)
|
||||
}
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
|
||||
llvm_module.llmod.as_ref().unwrap()
|
||||
});
|
||||
|
||||
let isize_ty = Type::ix_llcx(llcx, target::pointer_size() as u64);
|
||||
let isize_ty = Type::ix_llcx(llcx, target::POINTER_WIDTH as u64);
|
||||
// the eh_personality function doesnt make sense on the GPU, but we still need to give
|
||||
// rustc something, so we just give it an empty function
|
||||
let eh_personality = unsafe {
|
||||
|
||||
@@ -152,11 +152,8 @@ fn get_simple_intrinsic<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, name: Symbol) -> O
|
||||
|
||||
fn int_type_width_signed(ty: Ty<'_>, _cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {
|
||||
match ty.kind() {
|
||||
ty::Int(t) => Some((t.bit_width().unwrap_or(target::pointer_size() as u64), true)),
|
||||
ty::Uint(t) => Some((
|
||||
t.bit_width().unwrap_or(target::pointer_size() as u64),
|
||||
false,
|
||||
)),
|
||||
ty::Int(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), true)),
|
||||
ty::Uint(t) => Some((t.bit_width().unwrap_or(target::POINTER_WIDTH as u64), false)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,10 +317,10 @@ pub(crate) unsafe fn create_module<'ll>(
|
||||
let mod_name = CString::new(mod_name).expect("nul in module name");
|
||||
let llmod = llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx);
|
||||
|
||||
let data_layout = CString::new(target::data_layout()).unwrap();
|
||||
let data_layout = CString::new(target::DATA_LAYOUT).unwrap();
|
||||
llvm::LLVMSetDataLayout(llmod, data_layout.as_ptr());
|
||||
|
||||
let target = CString::new(target::target_triple()).unwrap();
|
||||
let target = CString::new(target::TARGET_TRIPLE).unwrap();
|
||||
llvm::LLVMSetTarget(llmod, target.as_ptr());
|
||||
|
||||
llmod
|
||||
|
||||
@@ -1,62 +1,21 @@
|
||||
//! Utility handlers for 32 bit and 64 bit nvptx targets
|
||||
//!
|
||||
//! NVVM IR only supports nvptx64-nvidia-cuda and nvptx-nvidia-cuda
|
||||
//! Therefore we completely ignore the target set in the session.
|
||||
//! This allows the user to cfg for targets like arm/x86/etc while still
|
||||
//! compiling for nvptx
|
||||
|
||||
use crate::llvm::{self, Type};
|
||||
use rustc_target::spec::{LinkerFlavor, MergeFunctions, PanicStrategy, Target, TargetOptions};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
/// Whether we are compiling for 32 bit (nvptx-nvidia-cuda).
|
||||
/// This is a global variable so we don't have to pass around a variable to
|
||||
/// a lot of things when this never varies across codegen invocations.
|
||||
static TARGET_32_BIT: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// The data layouts of NVVM targets
|
||||
/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#data-layout>
|
||||
pub fn data_layout() -> &'static str {
|
||||
if TARGET_32_BIT.load(Ordering::SeqCst) {
|
||||
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
} else {
|
||||
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
}
|
||||
}
|
||||
|
||||
/// The target triples of NVVM targets
|
||||
/// <https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#target-triple>
|
||||
pub fn target_triple() -> &'static str {
|
||||
if TARGET_32_BIT.load(Ordering::SeqCst) {
|
||||
"nvptx-nvidia-cuda"
|
||||
} else {
|
||||
"nvptx64-nvidia-cuda"
|
||||
}
|
||||
}
|
||||
pub const DATA_LAYOUT: &str = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
pub const TARGET_TRIPLE: &str = "nvptx64-nvidia-cuda";
|
||||
pub const POINTER_WIDTH: u32 = 64;
|
||||
|
||||
/// The pointer width of the current target
|
||||
pub(crate) unsafe fn usize_ty(llcx: &'_ llvm::Context) -> &'_ Type {
|
||||
if TARGET_32_BIT.load(Ordering::SeqCst) {
|
||||
llvm::LLVMInt32TypeInContext(llcx)
|
||||
} else {
|
||||
llvm::LLVMInt64TypeInContext(llcx)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pointer_size() -> usize {
|
||||
if TARGET_32_BIT.load(Ordering::SeqCst) {
|
||||
32
|
||||
} else {
|
||||
64
|
||||
}
|
||||
llvm::LLVMInt64TypeInContext(llcx)
|
||||
}
|
||||
|
||||
pub fn target() -> Target {
|
||||
Target {
|
||||
arch: "nvptx".to_string(),
|
||||
data_layout: data_layout().to_string(),
|
||||
llvm_target: target_triple().to_string(),
|
||||
pointer_width: pointer_size() as u32,
|
||||
data_layout: DATA_LAYOUT.to_string(),
|
||||
llvm_target: "nvptx64-nvidia-cuda".to_string(),
|
||||
pointer_width: 64,
|
||||
|
||||
options: TargetOptions {
|
||||
os: "cuda".to_string(),
|
||||
|
||||
Reference in New Issue
Block a user