-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode." #78429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang Author: Valery Pykhtin (vpykhtin) ChangesReverts llvm/llvm-project#71556 Failures: Full diff: https://github.com/llvm/llvm-project/pull/78429.diff 4 Files Affected:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a0e27ce22fe7d9..43553131f63c54 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -24,11 +24,13 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
}
// CHECK-LABEL: @test_read_exec(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global uint* out) {
*out = __builtin_amdgcn_read_exec();
}
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
@@ -36,7 +38,9 @@ void test_read_exec_lo(global uint* out) {
}
// CHECK-LABEL: @test_read_exec_hi(
-// CHECK: store i32 0, ptr addrspace(1) %out
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 84d4b3d2b151da..af5bcc32818105 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isNullConstant(Cond->getOperand(1)) &&
- // We may encounter ballot.i64 in wave32 mode on -O0.
+ // TODO: make condition below an assert after fixing ballot bitwidth.
VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 3c2351673be5c7..898289019c7189 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -990,19 +990,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
}
}
- if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
- // %b64 = call i64 ballot.i64(...)
- // =>
- // %b32 = call i32 ballot.i32(...)
- // %b64 = zext i32 %b32 to i64
- Value *Call = IC.Builder.CreateZExt(
- IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
- {IC.Builder.getInt32Ty()},
- {II.getArgOperand(0)}),
- II.getType());
- Call->takeName(&II);
- return IC.replaceInstUsesWith(II, Call);
- }
break;
}
case Intrinsic::amdgcn_wqm_vote: {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 94c32e3cbe99f7..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2599,8 +2599,7 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
define i64 @ballot_nocombine_64(i1 %i) {
; CHECK-LABEL: @ballot_nocombine_64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
-; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2617,8 +2616,7 @@ define i64 @ballot_zero_64() {
define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
-; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
|
@llvm/pr-subscribers-backend-amdgpu Author: Valery Pykhtin (vpykhtin) ChangesReverts llvm/llvm-project#71556 Failures: Full diff: https://github.com/llvm/llvm-project/pull/78429.diff 4 Files Affected:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a0e27ce22fe7d9..43553131f63c54 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -24,11 +24,13 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
}
// CHECK-LABEL: @test_read_exec(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global uint* out) {
*out = __builtin_amdgcn_read_exec();
}
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
@@ -36,7 +38,9 @@ void test_read_exec_lo(global uint* out) {
}
// CHECK-LABEL: @test_read_exec_hi(
-// CHECK: store i32 0, ptr addrspace(1) %out
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 84d4b3d2b151da..af5bcc32818105 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isNullConstant(Cond->getOperand(1)) &&
- // We may encounter ballot.i64 in wave32 mode on -O0.
+ // TODO: make condition below an assert after fixing ballot bitwidth.
VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 3c2351673be5c7..898289019c7189 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -990,19 +990,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
}
}
- if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
- // %b64 = call i64 ballot.i64(...)
- // =>
- // %b32 = call i32 ballot.i32(...)
- // %b64 = zext i32 %b32 to i64
- Value *Call = IC.Builder.CreateZExt(
- IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
- {IC.Builder.getInt32Ty()},
- {II.getArgOperand(0)}),
- II.getType());
- Call->takeName(&II);
- return IC.replaceInstUsesWith(II, Call);
- }
break;
}
case Intrinsic::amdgcn_wqm_vote: {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 94c32e3cbe99f7..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2599,8 +2599,7 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
define i64 @ballot_nocombine_64(i1 %i) {
; CHECK-LABEL: @ballot_nocombine_64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
-; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2617,8 +2616,7 @@ define i64 @ballot_zero_64() {
define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
-; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
|
…ve32 mode." (llvm#78429) This reverts commit 9791e54. Added lit test constraint: REQUIRES: amdgpu-registered-target
Reverts #71556
Failures:
https://lab.llvm.org/buildbot/#/builders/188/builds/40541
https://lab.llvm.org/buildbot/#/builders/91/builds/21847
https://lab.llvm.org/buildbot/#/builders/98/builds/31671
https://lab.llvm.org/buildbot/#/builders/139/builds/57289