Skip to content

Revert "[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode." #78429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 17, 2024

Conversation

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:AMDGPU llvm:transforms labels Jan 17, 2024
@llvmbot
Copy link
Member

llvmbot commented Jan 17, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-clang

Author: Valery Pykhtin (vpykhtin)

Changes

Reverts llvm/llvm-project#71556

Failures:
https://lab.llvm.org/buildbot/#/builders/188/builds/40541
https://lab.llvm.org/buildbot/#/builders/91/builds/21847
https://lab.llvm.org/buildbot/#/builders/98/builds/31671
https://lab.llvm.org/buildbot/#/builders/139/builds/57289


Full diff: https://github.com/llvm/llvm-project/pull/78429.diff

4 Files Affected:

  • (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl (+6-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+1-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (-13)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+2-4)
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a0e27ce22fe7d9..43553131f63c54 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -24,11 +24,13 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
 }
 
 // CHECK-LABEL: @test_read_exec(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
 void test_read_exec(global uint* out) {
   *out = __builtin_amdgcn_read_exec();
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_read_exec_lo(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
 void test_read_exec_lo(global uint* out) {
@@ -36,7 +38,9 @@ void test_read_exec_lo(global uint* out) {
 }
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: store i32 0, ptr addrspace(1) %out
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
 void test_read_exec_hi(global uint* out) {
   *out = __builtin_amdgcn_read_exec_hi();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 84d4b3d2b151da..af5bcc32818105 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
     auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
     if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
         isNullConstant(Cond->getOperand(1)) &&
-        // We may encounter ballot.i64 in wave32 mode on -O0.
+        // TODO: make condition below an assert after fixing ballot bitwidth.
         VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
       // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
       // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 3c2351673be5c7..898289019c7189 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -990,19 +990,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
       }
     }
-    if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
-      // %b64 = call i64 ballot.i64(...)
-      // =>
-      // %b32 = call i32 ballot.i32(...)
-      // %b64 = zext i32 %b32 to i64
-      Value *Call = IC.Builder.CreateZExt(
-          IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
-                                     {IC.Builder.getInt32Ty()},
-                                     {II.getArgOperand(0)}),
-          II.getType());
-      Call->takeName(&II);
-      return IC.replaceInstUsesWith(II, Call);
-    }
     break;
   }
   case Intrinsic::amdgcn_wqm_vote: {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 94c32e3cbe99f7..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2599,8 +2599,7 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
 
 define i64 @ballot_nocombine_64(i1 %i) {
 ; CHECK-LABEL: @ballot_nocombine_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
-; CHECK-NEXT:    [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2617,8 +2616,7 @@ define i64 @ballot_zero_64() {
 
 define i64 @ballot_one_64() {
 ; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
-; CHECK-NEXT:    [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)

@llvmbot
Copy link
Member

llvmbot commented Jan 17, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Valery Pykhtin (vpykhtin)

Changes

Reverts llvm/llvm-project#71556

Failures:
https://lab.llvm.org/buildbot/#/builders/188/builds/40541
https://lab.llvm.org/buildbot/#/builders/91/builds/21847
https://lab.llvm.org/buildbot/#/builders/98/builds/31671
https://lab.llvm.org/buildbot/#/builders/139/builds/57289


Full diff: https://github.com/llvm/llvm-project/pull/78429.diff

4 Files Affected:

  • (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl (+6-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+1-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (-13)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+2-4)
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a0e27ce22fe7d9..43553131f63c54 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -24,11 +24,13 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
 }
 
 // CHECK-LABEL: @test_read_exec(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
 void test_read_exec(global uint* out) {
   *out = __builtin_amdgcn_read_exec();
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_read_exec_lo(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
 void test_read_exec_lo(global uint* out) {
@@ -36,7 +38,9 @@ void test_read_exec_lo(global uint* out) {
 }
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: store i32 0, ptr addrspace(1) %out
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
 void test_read_exec_hi(global uint* out) {
   *out = __builtin_amdgcn_read_exec_hi();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 84d4b3d2b151da..af5bcc32818105 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
     auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
     if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
         isNullConstant(Cond->getOperand(1)) &&
-        // We may encounter ballot.i64 in wave32 mode on -O0.
+        // TODO: make condition below an assert after fixing ballot bitwidth.
         VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
       // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
       // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 3c2351673be5c7..898289019c7189 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -990,19 +990,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
       }
     }
-    if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
-      // %b64 = call i64 ballot.i64(...)
-      // =>
-      // %b32 = call i32 ballot.i32(...)
-      // %b64 = zext i32 %b32 to i64
-      Value *Call = IC.Builder.CreateZExt(
-          IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
-                                     {IC.Builder.getInt32Ty()},
-                                     {II.getArgOperand(0)}),
-          II.getType());
-      Call->takeName(&II);
-      return IC.replaceInstUsesWith(II, Call);
-    }
     break;
   }
   case Intrinsic::amdgcn_wqm_vote: {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 94c32e3cbe99f7..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2599,8 +2599,7 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
 
 define i64 @ballot_nocombine_64(i1 %i) {
 ; CHECK-LABEL: @ballot_nocombine_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
-; CHECK-NEXT:    [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
@@ -2617,8 +2616,7 @@ define i64 @ballot_zero_64() {
 
 define i64 @ballot_one_64() {
 ; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
-; CHECK-NEXT:    [[B:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang Clang issues not falling into any other category llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants