Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 58fdc43

Browse files
committed
NVPTX support for new asm!
1 parent 3137f8e commit 58fdc43

File tree

4 files changed

+238
-0
lines changed

4 files changed

+238
-0
lines changed

src/librustc_codegen_llvm/asm.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ impl AsmBuilderMethods<'tcx> for Builder<'a, 'll, 'tcx> {
254254
]);
255255
}
256256
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {}
257+
InlineAsmArch::Nvptx64 => {}
257258
}
258259
}
259260
if !options.contains(InlineAsmOptions::NOMEM) {
@@ -410,6 +411,11 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass) -> String {
410411
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => "x",
411412
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg)
412413
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg) => "w",
414+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => "h",
415+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => "r",
416+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => "l",
417+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg32) => "f",
418+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg64) => "d",
413419
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => "r",
414420
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => "f",
415421
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) => "r",
@@ -452,6 +458,7 @@ fn modifier_to_llvm(
452458
modifier
453459
}
454460
}
461+
InlineAsmRegClass::Nvptx(_) => None,
455462
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg)
456463
| InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => None,
457464
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)
@@ -502,6 +509,11 @@ fn dummy_output_type(cx: &CodegenCx<'ll, 'tcx>, reg: InlineAsmRegClass) -> &'ll
502509
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => {
503510
cx.type_vector(cx.type_i64(), 2)
504511
}
512+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(),
513+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(),
514+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(),
515+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg32) => cx.type_f32(),
516+
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg64) => cx.type_f64(),
505517
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
506518
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
507519
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)

src/librustc_target/asm/mod.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,13 @@ macro_rules! types {
146146

147147
mod aarch64;
148148
mod arm;
149+
mod nvptx;
149150
mod riscv;
150151
mod x86;
151152

152153
pub use aarch64::{AArch64InlineAsmReg, AArch64InlineAsmRegClass};
153154
pub use arm::{ArmInlineAsmReg, ArmInlineAsmRegClass};
155+
pub use nvptx::{NvptxInlineAsmReg, NvptxInlineAsmRegClass};
154156
pub use riscv::{RiscVInlineAsmReg, RiscVInlineAsmRegClass};
155157
pub use x86::{X86InlineAsmReg, X86InlineAsmRegClass};
156158

@@ -162,6 +164,7 @@ pub enum InlineAsmArch {
162164
AArch64,
163165
RiscV32,
164166
RiscV64,
167+
Nvptx64,
165168
}
166169

167170
impl FromStr for InlineAsmArch {
@@ -175,6 +178,7 @@ impl FromStr for InlineAsmArch {
175178
"aarch64" => Ok(Self::AArch64),
176179
"riscv32" => Ok(Self::RiscV32),
177180
"riscv64" => Ok(Self::RiscV64),
181+
"nvptx64" => Ok(Self::Nvptx64),
178182
_ => Err(()),
179183
}
180184
}
@@ -196,6 +200,7 @@ pub enum InlineAsmReg {
196200
Arm(ArmInlineAsmReg),
197201
AArch64(AArch64InlineAsmReg),
198202
RiscV(RiscVInlineAsmReg),
203+
Nvptx(NvptxInlineAsmReg),
199204
}
200205

201206
impl InlineAsmReg {
@@ -205,6 +210,7 @@ impl InlineAsmReg {
205210
Self::Arm(r) => r.name(),
206211
Self::AArch64(r) => r.name(),
207212
Self::RiscV(r) => r.name(),
213+
Self::Nvptx(r) => r.name(),
208214
}
209215
}
210216

@@ -214,6 +220,7 @@ impl InlineAsmReg {
214220
Self::Arm(r) => InlineAsmRegClass::Arm(r.reg_class()),
215221
Self::AArch64(r) => InlineAsmRegClass::AArch64(r.reg_class()),
216222
Self::RiscV(r) => InlineAsmRegClass::RiscV(r.reg_class()),
223+
Self::Nvptx(r) => InlineAsmRegClass::Nvptx(r.reg_class()),
217224
}
218225
}
219226

@@ -236,6 +243,9 @@ impl InlineAsmReg {
236243
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
237244
Self::RiscV(RiscVInlineAsmReg::parse(arch, has_feature, &name)?)
238245
}
246+
InlineAsmArch::Nvptx64 => {
247+
Self::Nvptx(NvptxInlineAsmReg::parse(arch, has_feature, &name)?)
248+
}
239249
})
240250
}
241251

@@ -252,6 +262,7 @@ impl InlineAsmReg {
252262
Self::Arm(r) => r.emit(out, arch, modifier),
253263
Self::AArch64(r) => r.emit(out, arch, modifier),
254264
Self::RiscV(r) => r.emit(out, arch, modifier),
265+
Self::Nvptx(r) => r.emit(out, arch, modifier),
255266
}
256267
}
257268

@@ -261,6 +272,7 @@ impl InlineAsmReg {
261272
Self::Arm(r) => r.overlapping_regs(|r| cb(Self::Arm(r))),
262273
Self::AArch64(_) => cb(self),
263274
Self::RiscV(_) => cb(self),
275+
Self::Nvptx(_) => cb(self),
264276
}
265277
}
266278
}
@@ -281,6 +293,7 @@ pub enum InlineAsmRegClass {
281293
Arm(ArmInlineAsmRegClass),
282294
AArch64(AArch64InlineAsmRegClass),
283295
RiscV(RiscVInlineAsmRegClass),
296+
Nvptx(NvptxInlineAsmRegClass),
284297
}
285298

286299
impl InlineAsmRegClass {
@@ -290,6 +303,7 @@ impl InlineAsmRegClass {
290303
Self::Arm(r) => r.name(),
291304
Self::AArch64(r) => r.name(),
292305
Self::RiscV(r) => r.name(),
306+
Self::Nvptx(r) => r.name(),
293307
}
294308
}
295309

@@ -302,6 +316,7 @@ impl InlineAsmRegClass {
302316
Self::Arm(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Arm),
303317
Self::AArch64(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::AArch64),
304318
Self::RiscV(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::RiscV),
319+
Self::Nvptx(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Nvptx),
305320
}
306321
}
307322

@@ -321,6 +336,7 @@ impl InlineAsmRegClass {
321336
Self::Arm(r) => r.suggest_modifier(arch, ty),
322337
Self::AArch64(r) => r.suggest_modifier(arch, ty),
323338
Self::RiscV(r) => r.suggest_modifier(arch, ty),
339+
Self::Nvptx(r) => r.suggest_modifier(arch, ty),
324340
}
325341
}
326342

@@ -336,6 +352,7 @@ impl InlineAsmRegClass {
336352
Self::Arm(r) => r.default_modifier(arch),
337353
Self::AArch64(r) => r.default_modifier(arch),
338354
Self::RiscV(r) => r.default_modifier(arch),
355+
Self::Nvptx(r) => r.default_modifier(arch),
339356
}
340357
}
341358

@@ -350,6 +367,7 @@ impl InlineAsmRegClass {
350367
Self::Arm(r) => r.supported_types(arch),
351368
Self::AArch64(r) => r.supported_types(arch),
352369
Self::RiscV(r) => r.supported_types(arch),
370+
Self::Nvptx(r) => r.supported_types(arch),
353371
}
354372
}
355373

@@ -367,6 +385,9 @@ impl InlineAsmRegClass {
367385
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
368386
Self::RiscV(RiscVInlineAsmRegClass::parse(arch, name)?)
369387
}
388+
InlineAsmArch::Nvptx64 => {
389+
Self::Nvptx(NvptxInlineAsmRegClass::parse(arch, name)?)
390+
}
370391
})
371392
})
372393
}
@@ -379,6 +400,7 @@ impl InlineAsmRegClass {
379400
Self::Arm(r) => r.valid_modifiers(arch),
380401
Self::AArch64(r) => r.valid_modifiers(arch),
381402
Self::RiscV(r) => r.valid_modifiers(arch),
403+
Self::Nvptx(r) => r.valid_modifiers(arch),
382404
}
383405
}
384406
}
@@ -518,5 +540,10 @@ pub fn allocatable_registers(
518540
riscv::fill_reg_map(arch, has_feature, &mut map);
519541
map
520542
}
543+
InlineAsmArch::Nvptx64 => {
544+
let mut map = nvptx::regclass_map();
545+
nvptx::fill_reg_map(arch, has_feature, &mut map);
546+
map
547+
}
521548
}
522549
}

src/librustc_target/asm/nvptx.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
use super::{InlineAsmArch, InlineAsmType};
2+
use rustc_macros::HashStable_Generic;
3+
use std::fmt;
4+
5+
def_reg_class! {
6+
Nvptx NvptxInlineAsmRegClass {
7+
reg16,
8+
reg32,
9+
reg64,
10+
freg32,
11+
freg64,
12+
}
13+
}
14+
15+
impl NvptxInlineAsmRegClass {
16+
pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] {
17+
&[]
18+
}
19+
20+
pub fn suggest_class(self, _arch: InlineAsmArch, _ty: InlineAsmType) -> Option<Self> {
21+
None
22+
}
23+
24+
pub fn suggest_modifier(
25+
self,
26+
_arch: InlineAsmArch,
27+
_ty: InlineAsmType,
28+
) -> Option<(char, &'static str)> {
29+
None
30+
}
31+
32+
pub fn default_modifier(self, _arch: InlineAsmArch) -> Option<(char, &'static str)> {
33+
None
34+
}
35+
36+
pub fn supported_types(
37+
self,
38+
_arch: InlineAsmArch,
39+
) -> &'static [(InlineAsmType, Option<&'static str>)] {
40+
match self {
41+
Self::reg16 => types! { _: I8, I16; },
42+
Self::reg32 => types! { _: I8, I16, I32; },
43+
Self::reg64 => types! { _: I8, I16, I32, I64; },
44+
Self::freg32 => types! { _: F32; },
45+
Self::freg64 => types! { _: F32, F64; },
46+
}
47+
}
48+
}
49+
50+
def_regs! {
51+
Nvptx NvptxInlineAsmReg NvptxInlineAsmRegClass {
52+
// We have to define a register, otherwise we get warnings/errors about unused imports and
53+
// unreachable code. Do what clang does and define r0.
54+
r0: reg32 = ["r0"],
55+
#error = ["tid", "tid.x", "tid.y", "tid.z"] => "tid not supported for inline asm",
56+
#error = ["ntid", "ntid.x", "ntid.y", "ntid.z"] => "ntid not supported for inline asm",
57+
#error = ["laneid"] => "laneid not supported for inline asm",
58+
#error = ["warpid"] => "warpid not supported for inline asm",
59+
#error = ["nwarpid"] => "nwarpid not supported for inline asm",
60+
#error = ["ctaid", "ctaid.x", "ctaid.y", "ctaid.z"] => "ctaid not supported for inline asm",
61+
#error = ["nctaid", "nctaid.x", "nctaid.y", "nctaid.z"] => "nctaid not supported for inline asm",
62+
#error = ["smid"] => "smid not supported for inline asm",
63+
#error = ["nsmid"] => "nsmid not supported for inline asm",
64+
#error = ["gridid"] => "gridid not supported for inline asm",
65+
#error = ["lanemask_eq"] => "lanemask_eq not supported for inline asm",
66+
#error = ["lanemask_le"] => "lanemask_le not supported for inline asm",
67+
#error = ["lanemask_lt"] => "lanemask_lt not supported for inline asm",
68+
#error = ["lanemask_ge"] => "lanemask_ge not supported for inline asm",
69+
#error = ["lanemask_gt"] => "lanemask_gt not supported for inline asm",
70+
#error = ["clock", "clock_hi"] => "clock not supported for inline asm",
71+
#error = ["clock64"] => "clock64 not supported for inline asm",
72+
#error = ["pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7"] => "pm not supported for inline asm",
73+
#error = ["pm0_64", "pm1_64", "pm2_64", "pm3_64", "pm4_64", "pm5_64", "pm6_64", "pm7_64"] => "pm_64 not supported for inline asm",
74+
#error = ["envreg0", "envreg1", "envreg2", "envreg3", "envreg4", "envreg5", "envreg6", "envreg7", "envreg8", "envreg9", "envreg10", "envreg11", "envreg12", "envreg13", "envreg14", "envreg15", "envreg16", "envreg17", "envreg18", "envreg19", "envreg20", "envreg21", "envreg22", "envreg23", "envreg24", "envreg25", "envreg26", "envreg27", "envreg28", "envreg29", "envreg30", "envreg31"] => "envreg not supported for inline asm",
75+
#error = ["globaltimer", "globaltimer_lo", "globaltimer_hi"] => "globaltimer not supported for inline asm",
76+
#error = ["total_mem_size"] => "total_mem_size not supported for inline asm",
77+
#error = ["dynamic_mem_size"] => "dynamic_mem_size not supported for inline asm",
78+
}
79+
}
80+
81+
impl NvptxInlineAsmReg {
82+
pub fn emit(
83+
self,
84+
out: &mut dyn fmt::Write,
85+
_arch: InlineAsmArch,
86+
_modifier: Option<char>,
87+
) -> fmt::Result {
88+
out.write_str(self.name())
89+
}
90+
}

src/test/assembly/asm/nvptx-types.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// no-system-llvm
2+
// assembly-output: emit-asm
3+
// compile-flags: --target --nvptx64-nvidia-cuda
4+
// only-nvptx64
5+
// ignore-nvptx64
6+
7+
#![feature(no_core, lang_items, rustc_attrs)]
8+
#![crate_type = "rlib"]
9+
#![no_core]
10+
#![allow(asm_sub_register, non_camel_case_types)]
11+
12+
#[rustc_builtin_macro]
13+
macro_rules! asm {
14+
() => {};
15+
}
16+
#[rustc_builtin_macro]
17+
macro_rules! concat {
18+
() => {};
19+
}
20+
#[rustc_builtin_macro]
21+
macro_rules! stringify {
22+
() => {};
23+
}
24+
25+
#[lang = "sized"]
26+
trait Sized {}
27+
#[lang = "copy"]
28+
trait Copy {}
29+
30+
type ptr = *mut u8;
31+
32+
impl Copy for i8 {}
33+
impl Copy for i16 {}
34+
impl Copy for i32 {}
35+
impl Copy for f32 {}
36+
impl Copy for i64 {}
37+
impl Copy for f64 {}
38+
impl Copy for ptr {}
39+
40+
#[no_mangle]
41+
fn extern_func();
42+
43+
// CHECK-LABEL: sym_fn
44+
// CHECK: #APP
45+
// CHECK call extern_func;
46+
// CHECK: #NO_APP
47+
#[no_mangle]
48+
pub unsafe fn sym_fn() {
49+
asm!("call {}", sym extern_func);
50+
}
51+
52+
macro_rules! check {
53+
($func:ident $ty:ident, $class:ident $mov:literal) => {
54+
#[no_mangle]
55+
pub unsafe fn $func(x: $ty) -> $ty {
56+
// Hack to avoid function merging
57+
extern "Rust" {
58+
fn dont_merge(s: &str);
59+
}
60+
dont_merge(stringify!($func));
61+
62+
let y;
63+
asm!(concat!($mov, " {}, {};"), out($class) y, in($class) x);
64+
y
65+
}
66+
};
67+
}
68+
69+
// CHECK-LABEL: reg_i8
70+
// CHECK: #APP
71+
// CHECK: mov.i16 {{[a-z0-9]+}}, {{[a-z0-9]+}};
72+
// CHECK: #NO_APP
73+
check!(reg_i8 i8 reg16 "mov.i16");
74+
75+
// CHECK-LABEL: reg_i16
76+
// CHECK: #APP
77+
// CHECK: mov.i16 {{[a-z0-9]+}}, {{[a-z0-9]+}};
78+
// CHECK: #NO_APP
79+
check!(reg_i16 i16 reg16 "mov.i16");
80+
81+
// CHECK-LABEL: reg_i32
82+
// CHECK: #APP
83+
// CHECK: mov.i32 {{[a-z0-9]+}}, {{[a-z0-9]+}};
84+
// CHECK: #NO_APP
85+
check!(reg_i32 i32 reg32 "mov.i32");
86+
87+
// CHECK-LABEL: reg_f32
88+
// CHECK: #APP
89+
// CHECK: mov.f32 {{[a-z0-9]+}}, {{[a-z0-9]+}};
90+
// CHECK: #NO_APP
91+
check!(reg_f32 f32 freg32 "mov.f32");
92+
93+
// CHECK-LABEL: reg_i54
94+
// CHECK: #APP
95+
// CHECK: mov.i64 {{[a-z0-9]+}}, {{[a-z0-9]+}};
96+
// CHECK: #NO_APP
97+
check!(reg_i64 i64 reg64 "mov.i64");
98+
99+
// CHECK-LABEL: reg_f64
100+
// CHECK: #APP
101+
// CHECK: mov.f64 {{[a-z0-9]+}}, {{[a-z0-9]+}};
102+
// CHECK: #NO_APP
103+
check!(reg_f64 f64 freg64 "mov.f64");
104+
105+
// CHECK-LABEL: reg_ptr
106+
// CHECK: #APP
107+
// CHECK: mov.i64 {{[a-z0-9]+}}, {{[a-z0-9]+}};
108+
// CHECK: #NO_APP
109+
check!(reg_ptr ptr reg64 "mov.i64");

0 commit comments

Comments
 (0)