-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NVPTX] Auto-Upgrade llvm.nvvm.swap.lo.hi.b64 to llvm.fshl #132098
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Auto-Upgrade llvm.nvvm.swap.lo.hi.b64 to llvm.fshl #132098
Conversation
@llvm/pr-subscribers-llvm-ir Author: Alex MacLean (AlexMaclean) ChangesAfter 3c8c291 the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed. Full diff: https://github.com/llvm/llvm-project/pull/132098.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 665db3025903e..80e10f33b770d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -45,6 +45,7 @@
// * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant
// * llvm.nvvm.ldg.global.f --> ibid.
// * llvm.nvvm.ldg.global.p --> ibid.
+// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32)
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
"llvm.nvvm.sust.p.3d.v4i32.trap">,
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
-def int_nvvm_swap_lo_hi_b64
- : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
- ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
-
-
// Accessing special registers.
class PTXReadSRegIntrinsicNB_r32
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7a194219c5cd4..9be307bb071ed 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
if (Name.consume_front("abs."))
// nvvm.abs.{i,ii}
Expand = Name == "i" || Name == "ll";
- else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
+ else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
+ Name == "swap.lo.hi.b64")
Expand = true;
else if (Name.consume_front("max.") || Name.consume_front("min."))
// nvvm.{min,max}.{i,ii,ui,ull}
@@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
{Arg, Arg, ZExtShiftAmt});
+ } else if (Name == "swap.lo.hi.b64") {
+ Type *Int64Ty = Builder.getInt64Ty();
+ Value *Arg = CI->getOperand(0);
+ Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
+ {Arg, Arg, Builder.getInt64(32)});
} else if ((Name.consume_front("ptr.gen.to.") &&
(Name.starts_with("local") || Name.starts_with("shared") ||
Name.starts_with("global") || Name.starts_with("constant"))) ||
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 90f56a421b19b..b2e05a567b4fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
-
-def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
- (V2I32toI64 (I64toI32H $src),
- (I64toI32L $src))> ;
-
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 5cc3a30277459..588e79a7428a4 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64)
declare i32 @llvm.nvvm.rotate.b32(i32, i32)
declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
declare i64 @llvm.nvvm.rotate.b64(i64, i32)
+declare i64 @llvm.nvvm.swap.lo.hi.b64(i64)
declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
@@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) {
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
+; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32)
;
%r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
%r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
%r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
+ %r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b)
ret void
}
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesAfter 3c8c291 the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed. Full diff: https://github.com/llvm/llvm-project/pull/132098.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 665db3025903e..80e10f33b770d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -45,6 +45,7 @@
// * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant
// * llvm.nvvm.ldg.global.f --> ibid.
// * llvm.nvvm.ldg.global.p --> ibid.
+// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32)
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
"llvm.nvvm.sust.p.3d.v4i32.trap">,
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
-def int_nvvm_swap_lo_hi_b64
- : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
- ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
-
-
// Accessing special registers.
class PTXReadSRegIntrinsicNB_r32
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7a194219c5cd4..9be307bb071ed 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
if (Name.consume_front("abs."))
// nvvm.abs.{i,ii}
Expand = Name == "i" || Name == "ll";
- else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
+ else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
+ Name == "swap.lo.hi.b64")
Expand = true;
else if (Name.consume_front("max.") || Name.consume_front("min."))
// nvvm.{min,max}.{i,ii,ui,ull}
@@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
{Arg, Arg, ZExtShiftAmt});
+ } else if (Name == "swap.lo.hi.b64") {
+ Type *Int64Ty = Builder.getInt64Ty();
+ Value *Arg = CI->getOperand(0);
+ Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
+ {Arg, Arg, Builder.getInt64(32)});
} else if ((Name.consume_front("ptr.gen.to.") &&
(Name.starts_with("local") || Name.starts_with("shared") ||
Name.starts_with("global") || Name.starts_with("constant"))) ||
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 90f56a421b19b..b2e05a567b4fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
-
-def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
- (V2I32toI64 (I64toI32H $src),
- (I64toI32L $src))> ;
-
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 5cc3a30277459..588e79a7428a4 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64)
declare i32 @llvm.nvvm.rotate.b32(i32, i32)
declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
declare i64 @llvm.nvvm.rotate.b64(i64, i32)
+declare i64 @llvm.nvvm.swap.lo.hi.b64(i64)
declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
@@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) {
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
+; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32)
;
%r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
%r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
%r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
+ %r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b)
ret void
}
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/16806 Here is the relevant piece of the build log for the reference
|
After 3c8c291 the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed.