Skip to content

Commit c98bed9

Browse files
vpykhtinsearlmc1
authored andcommitted
Reapply "[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode." (llvm#80303)
Reapply llvm#71556 with added lit test constraint: `REQUIRES: amdgpu-registered-target`. This reverts commit 9791e54. (cherry picked from commit b8025d1) Change-Id: I03aafda08ca433456f6e82accd6b702a307bfd0b
1 parent 7ddb137 commit c98bed9

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// REQUIRES: amdgpu-registered-target
12
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
23
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
34
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
915915
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
916916
}
917917
}
918+
if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
919+
// %b64 = call i64 ballot.i64(...)
920+
// =>
921+
// %b32 = call i32 ballot.i32(...)
922+
// %b64 = zext i32 %b32 to i64
923+
Value *Call = IC.Builder.CreateZExt(
924+
IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
925+
{IC.Builder.getInt32Ty()},
926+
{II.getArgOperand(0)}),
927+
II.getType());
928+
Call->takeName(&II);
929+
return IC.replaceInstUsesWith(II, Call);
930+
}
918931
break;
919932
}
920933
case Intrinsic::amdgcn_wqm_vote: {

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2565,7 +2565,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
25652565

25662566
define i64 @ballot_nocombine_64(i1 %i) {
25672567
; CHECK-LABEL: @ballot_nocombine_64(
2568-
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
2568+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
2569+
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
25692570
; CHECK-NEXT: ret i64 [[B]]
25702571
;
25712572
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2582,7 +2583,8 @@ define i64 @ballot_zero_64() {
25822583

25832584
define i64 @ballot_one_64() {
25842585
; CHECK-LABEL: @ballot_one_64(
2585-
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
2586+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
2587+
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
25862588
; CHECK-NEXT: ret i64 [[B]]
25872589
;
25882590
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)

0 commit comments

Comments
 (0)