Skip to content

Commit 30ff508

Browse files
authored
[NVPTX] Auto-Upgrade llvm.nvvm.swap.lo.hi.b64 to llvm.fshl (#132098)
After 3c8c291 the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed.
1 parent 2d876ed commit 30ff508

File tree

4 files changed

+11
-12
lines changed

4 files changed

+11
-12
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
// * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant
4646
// * llvm.nvvm.ldg.global.f --> ibid.
4747
// * llvm.nvvm.ldg.global.p --> ibid.
48+
// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32)
4849

4950
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
5051
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
46354636
"llvm.nvvm.sust.p.3d.v4i32.trap">,
46364637
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
46374638

4638-
def int_nvvm_swap_lo_hi_b64
4639-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
4640-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
4641-
ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
4642-
4643-
46444639
// Accessing special registers.
46454640

46464641
class PTXReadSRegIntrinsicNB_r32

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12921292
if (Name.consume_front("abs."))
12931293
// nvvm.abs.{i,ii}
12941294
Expand = Name == "i" || Name == "ll";
1295-
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1295+
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
1296+
Name == "swap.lo.hi.b64")
12961297
Expand = true;
12971298
else if (Name.consume_front("max.") || Name.consume_front("min."))
12981299
// nvvm.{min,max}.{i,ii,ui,ull}
@@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
23702371
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
23712372
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
23722373
{Arg, Arg, ZExtShiftAmt});
2374+
} else if (Name == "swap.lo.hi.b64") {
2375+
Type *Int64Ty = Builder.getInt64Ty();
2376+
Value *Arg = CI->getOperand(0);
2377+
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2378+
{Arg, Arg, Builder.getInt64(32)});
23732379
} else if ((Name.consume_front("ptr.gen.to.") &&
23742380
(Name.starts_with("local") || Name.starts_with("shared") ||
23752381
Name.starts_with("global") || Name.starts_with("constant"))) ||

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
29472947
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
29482948
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
29492949

2950-
2951-
def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
2952-
(V2I32toI64 (I64toI32H $src),
2953-
(I64toI32L $src))> ;
2954-
29552950
//-----------------------------------
29562951
// Texture Intrinsics
29572952
//-----------------------------------

llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64)
3434
declare i32 @llvm.nvvm.rotate.b32(i32, i32)
3535
declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
3636
declare i64 @llvm.nvvm.rotate.b64(i64, i32)
37+
declare i64 @llvm.nvvm.swap.lo.hi.b64(i64)
3738

3839
declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
3940
declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
@@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) {
166167
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
167168
; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
168169
; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
170+
; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32)
169171
;
170172
%r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
171173
%r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
172174
%r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
175+
%r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b)
173176
ret void
174177
}
175178

0 commit comments

Comments
 (0)