Skip to content

[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode. #71556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,19 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec(global uint* out) {
*out = __builtin_amdgcn_read_exec();
}

// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
*out = __builtin_amdgcn_read_exec_lo();
}

// CHECK-LABEL: @test_read_exec_hi(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: lshr i64 [[A:%.*]], 32
// CHECK: trunc i64 [[B:%.*]] to i32
// CHECK: store i32 0, ptr addrspace(1) %out
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2395,7 +2395,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isNullConstant(Cond->getOperand(1)) &&
// TODO: make condition below an assert after fixing ballot bitwidth.
// We may encounter ballot.i64 in wave32 mode on -O0.
VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
}
}
if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
// %b64 = call i64 ballot.i64(...)
// =>
// %b32 = call i32 ballot.i32(...)
// %b64 = zext i32 %b32 to i64
Value *Call = IC.Builder.CreateZExt(
IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
{IC.Builder.getInt32Ty()},
{II.getArgOperand(0)}),
II.getType());
Call->takeName(&II);
return IC.replaceInstUsesWith(II, Call);
}
break;
}
case Intrinsic::amdgcn_wqm_vote: {
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2599,7 +2599,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent

define i64 @ballot_nocombine_64(i1 %i) {
; CHECK-LABEL: @ballot_nocombine_64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
Expand All @@ -2616,7 +2617,8 @@ define i64 @ballot_zero_64() {

define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
Expand Down