-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][True16][CodeGen] Update and/or/xor codegen pattern for i16 #121835
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][CodeGen] Update and/or/xor codegen pattern for i16 #121835
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesIn true16 flow, remove and/or/xor patterns which match i16 to 32bit instructions Full diff: https://github.com/llvm/llvm-project/pull/121835.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ca4a0fa706c301..6bbf19179b7f6c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1261,23 +1261,39 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-foreach vt = [i16, v2i16] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
- (and vt:$src0, vt:$src1),
+ (and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (or vt:$src0, vt:$src1),
+ (or i16:$src0, i16:$src1),
(V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (xor vt:$src0, vt:$src1),
+ (xor i16:$src0, i16:$src1),
(V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
}
+def : GCNPat <
+ (and v2i16:$src0, v2i16:$src1),
+ (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (or v2i16:$src0, v2i16:$src1),
+ (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (xor v2i16:$src0, v2i16:$src1),
+ (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
let Predicates = [Has16BitInsts, isGFX8GFX9] in {
// Undo sub x, c -> add x, -c canonicalization since c is more likely
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 2775de29368fbb..572793e1c5d711 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -42,12 +42,10 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_uaddsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_min_u16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 775602ab80cde0..75866e33da23a8 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -39,9 +39,8 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_usubsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h clamp
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any tests for the xor and or patterns that could be exposed by a true16 test runline?
There must be |
Hi Joe. There are t16 and/xor/or test already enabled in https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AMDGPU/bf16.ll#L17186 . This patch additionally removes the 32bit pattern for t16 |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/11918 Here is the relevant piece of the build log for the reference
|
In true16 flow, remove and/or/xor 32bit patterns for i16