Skip to content

Commit 0a13c45

Browse files
gonzalobgyuxuanchen1997
authored andcommitted
[NVPTX] Support fence instruction (#99649)
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250530
1 parent bec590b commit 0a13c45

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3908,3 +3908,31 @@ def : Pat <
39083908
(V2I32toI64
39093909
(INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
39103910
(INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
3911+
3912+
3913+
////////////////////////////////////////////////////////////////////////////////
3914+
// PTX Fence instructions
3915+
////////////////////////////////////////////////////////////////////////////////
3916+
3917+
def atomic_thread_fence_seq_cst_sys :
3918+
NVPTXInst<(outs), (ins), "fence.sc.sys;", []>,
3919+
Requires<[hasPTX<60>, hasSM<70>]>;
3920+
def atomic_thread_fence_acq_rel_sys :
3921+
NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
3922+
Requires<[hasPTX<60>, hasSM<70>]>;
3923+
3924+
def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1)
3925+
Requires<[hasPTX<60>, hasSM<70>]>;
3926+
def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1)
3927+
Requires<[hasPTX<60>, hasSM<70>]>;
3928+
def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acq_rel(6) sys(1)
3929+
Requires<[hasPTX<60>, hasSM<70>]>;
3930+
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
3931+
Requires<[hasPTX<60>, hasSM<70>]>;
3932+
3933+
3934+
// If PTX<60 or SM<70, we fall back to MEMBAR:
3935+
def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1)
3936+
def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1)
3937+
def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1)
3938+
def : Pat<(atomic_fence (i64 7), (i64 1)), (INT_MEMBAR_SYS)>; // seq_cst(7) sys(1)

llvm/test/CodeGen/NVPTX/fence.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
3+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
4+
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
5+
6+
; CHECK-LABEL: fence_sc_sys
7+
define void @fence_sc_sys() local_unnamed_addr {
8+
; SM60: membar.sys
9+
; SM70: fence.sc.sys
10+
fence seq_cst
11+
ret void
12+
}
13+
14+
; CHECK-LABEL: fence_acq_rel_sys
15+
define void @fence_acq_rel_sys() local_unnamed_addr {
16+
; SM60: membar.sys
17+
; SM70: fence.acq_rel.sys
18+
fence acq_rel
19+
ret void
20+
}
21+
22+
; CHECK-LABEL: fence_release_sys
23+
define void @fence_release_sys() local_unnamed_addr {
24+
; SM60: membar.sys
25+
; SM70: fence.acq_rel.sys
26+
fence release
27+
ret void
28+
}
29+
30+
; CHECK-LABEL: fence_acquire_sys
31+
define void @fence_acquire_sys() local_unnamed_addr {
32+
; SM60: membar.sys
33+
; SM70: fence.acq_rel.sys
34+
fence acquire
35+
ret void
36+
}

0 commit comments

Comments
 (0)