AMDGPU: Change bitop3 intrinsic operand to i32 #118647

arsenm · 2024-12-04T14:47:28Z

No description provided.

arsenm · 2024-12-04T14:47:49Z

AMDGPU: Simplify definition of bitop3 operand. NFC. #118648
AMDGPU: Change bitop3 intrinsic operand to i32 #118647 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2024-12-04T14:48:09Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/118647.diff

4 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll (+16-16)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 112c26d20db14e..92418b9104ad14 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
 
 def int_amdgcn_bitop3 :
   DefaultAttrsIntrinsic<[llvm_anyint_ty],
-                        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
+                        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
                         [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
 
 } // TargetPrefix = "amdgcn"
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index d8eb9d155315a6..fc8c12a674e466 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1271,7 +1271,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
   let Validator = "isUInt<2>";
 }
 
-def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
+def BitOp3 : CustomOperand<i32, 1, "BitOp3">;
 def bitop3_0 : DefaultOperand<BitOp3, 0>;
 
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ff9376e635af96..a00785bf29c77a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1291,28 +1291,28 @@ let SubtargetPredicate = isGFX12Plus in {
 let SubtargetPredicate = HasBitOp3Insts  in {
   let isReMaterializable = 1 in {
     defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
-                                  VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
+                                  VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i32]>>,
                                                       VOP3_OPSEL>>;
     defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
-                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
+                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
   }
   def : GCNPat<
-    (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
+    (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
     (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
   >;
 
   def : GCNPat<
-    (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
+    (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
     (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
   >;
 
   def : GCNPat<
-    (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
+    (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
     (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
   >;
 
   def : GCNPat<
-    (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
+    (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
     (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
   >;
 } // End SubtargetPredicate = HasBitOp3Insts
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
index ff2f4db0d7a5f9..b6232cbc384967 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
@@ -2,15 +2,15 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s
 
-declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i8)
-declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i8)
+declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32)
+declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32)
 
 define amdgpu_ps float @bitop3_b32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: bitop3_b32_vvv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0xf
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 15)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 15)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -20,7 +20,7 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_bitop3_b32 v0, s0, v0, v1 bitop3:0x10
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 16)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 16)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -31,7 +31,7 @@ define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 17)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -43,7 +43,7 @@ define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c)
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 18)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -60,7 +60,7 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i8 19)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -79,7 +79,7 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i8 20)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -102,7 +102,7 @@ define amdgpu_ps float @bitop3_b32_iii() {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i8 21)
+  %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21)
   %ret_cast = bitcast i32 %ret to float
   ret float %ret_cast
 }
@@ -112,7 +112,7 @@ define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 15)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -122,7 +122,7 @@ define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 16)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -133,7 +133,7 @@ define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) {
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 17)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -145,7 +145,7 @@ define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c)
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12
 ; GCN-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 18)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -162,7 +162,7 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i8 19)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -181,7 +181,7 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i8 20)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }
@@ -203,7 +203,7 @@ define amdgpu_ps half @bitop3_b16_iii() {
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e8
 ; GFX950-GISEL-NEXT:    v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15
 ; GFX950-GISEL-NEXT:    ; return to shader part epilog
-  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i8 21)
+  %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21)
   %ret_cast = bitcast i16 %ret to half
   ret half %ret_cast
 }

shiltian · 2024-12-04T14:56:24Z

why is so?

arsenm · 2024-12-04T15:42:54Z

why is so?

This showed up in a merge commit after the original patch was committed. This is what's in the branch. I'm guessing this was to avoid work to fixup the type after 335bc3c

arsenm · 2024-12-04T20:41:33Z

Merge activity

Dec 4, 3:41 PM EST: A user started a stack merge that includes this pull request via Graphite.
Dec 4, 3:43 PM EST: Graphite rebased this pull request as part of a merge.
Dec 4, 3:44 PM EST: A user merged this pull request with Graphite.

arsenm added the backend:AMDGPU label Dec 4, 2024 — with Graphite App

arsenm mentioned this pull request Dec 4, 2024

AMDGPU: Simplify definition of bitop3 operand. NFC. #118648

Merged

arsenm requested review from jayfoad, kosarev, mariusz-sikora-at-amd, pravinjagtap, rampitec, shiltian and srpande December 4, 2024 14:48

arsenm marked this pull request as ready for review December 4, 2024 14:48

llvmbot added the llvm:ir label Dec 4, 2024

shiltian approved these changes Dec 4, 2024

View reviewed changes

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Dec 4, 2024

arsenm added 2 commits December 4, 2024 20:42

AMDGPU: Change bitop3 intrinsic operand to i32

adeacd4

Fix clang builtins

be4f050

arsenm force-pushed the users/arsenm/gfx950/change-bitop3-immarg-to-i32 branch from 3a94843 to be4f050 Compare December 4, 2024 20:42

arsenm merged commit e0f5253 into main Dec 4, 2024
5 of 7 checks passed

arsenm deleted the users/arsenm/gfx950/change-bitop3-immarg-to-i32 branch December 4, 2024 20:44

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Change bitop3 intrinsic operand to i32 #118647

AMDGPU: Change bitop3 intrinsic operand to i32 #118647

Uh oh!

arsenm commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 4, 2024 •

edited

Loading

Uh oh!

shiltian commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Change bitop3 intrinsic operand to i32 #118647

AMDGPU: Change bitop3 intrinsic operand to i32 #118647

Uh oh!

Conversation

arsenm commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024

Uh oh!

arsenm commented Dec 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented Dec 4, 2024 •

edited

Loading

llvmbot commented Dec 4, 2024 •

edited

Loading

arsenm commented Dec 4, 2024 •

edited

Loading