@@ -3908,3 +3908,31 @@ def : Pat <
3908
3908
(V2I32toI64
3909
3909
(INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
3910
3910
(INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
3911
+
3912
+
3913
+ ////////////////////////////////////////////////////////////////////////////////
3914
+ // PTX Fence instructions
3915
+ ////////////////////////////////////////////////////////////////////////////////
3916
+
3917
+ def atomic_thread_fence_seq_cst_sys :
3918
+ NVPTXInst<(outs), (ins), "fence.sc.sys;", []>,
3919
+ Requires<[hasPTX<60>, hasSM<70>]>;
3920
+ def atomic_thread_fence_acq_rel_sys :
3921
+ NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
3922
+ Requires<[hasPTX<60>, hasSM<70>]>;
3923
+
3924
+ def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1)
3925
+ Requires<[hasPTX<60>, hasSM<70>]>;
3926
+ def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1)
3927
+ Requires<[hasPTX<60>, hasSM<70>]>;
3928
+ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acq_rel(6) sys(1)
3929
+ Requires<[hasPTX<60>, hasSM<70>]>;
3930
+ def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
3931
+ Requires<[hasPTX<60>, hasSM<70>]>;
3932
+
3933
+
3934
+ // If PTX<60 or SM<70, we fall back to MEMBAR:
3935
+ def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1)
3936
+ def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1)
3937
+ def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1)
3938
+ def : Pat<(atomic_fence (i64 7), (i64 1)), (INT_MEMBAR_SYS)>; // seq_cst(7) sys(1)
0 commit comments