Skip to content

Commit 703e9e9

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen for bswap (#122849)
true16 codegen pattern for bswap
1 parent 8688a31 commit 703e9e9

File tree

2 files changed

+32
-8
lines changed

2 files changed

+32
-8
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,6 +3041,8 @@ def : GCNPat <
30413041

30423042
// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
30433043
// The 12s emit 0s.
3044+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3045+
let True16Predicate = p in {
30443046
def : GCNPat <
30453047
(i16 (bswap i16:$a)),
30463048
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
@@ -3050,6 +3052,19 @@ def : GCNPat <
30503052
(i32 (zext (bswap i16:$a))),
30513053
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
30523054
>;
3055+
}
3056+
3057+
let True16Predicate = UseRealTrue16Insts in {
3058+
def : GCNPat <
3059+
(i16 (bswap i16:$a)),
3060+
(EXTRACT_SUBREG (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))), lo16)
3061+
>;
3062+
3063+
def : GCNPat <
3064+
(i32 (zext (bswap i16:$a))),
3065+
(V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001)))
3066+
>;
3067+
}
30533068

30543069
// Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24)
30553070
def : GCNPat <

llvm/test/CodeGen/AMDGPU/bswap.ll

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck %s --check-prefix=SI
33
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
4-
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
4+
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16
5+
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16
56

67
declare i16 @llvm.bswap.i16(i16) nounwind readnone
78
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone
@@ -490,13 +491,21 @@ define float @missing_truncate_promote_bswap(i32 %arg) {
490491
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
491492
; VI-NEXT: s_setpc_b64 s[30:31]
492493
;
493-
; GFX11-LABEL: missing_truncate_promote_bswap:
494-
; GFX11: ; %bb.0: ; %bb
495-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496-
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
497-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
498-
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
499-
; GFX11-NEXT: s_setpc_b64 s[30:31]
494+
; GFX11-REAL16-LABEL: missing_truncate_promote_bswap:
495+
; GFX11-REAL16: ; %bb.0: ; %bb
496+
; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497+
; GFX11-REAL16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
498+
; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
499+
; GFX11-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
500+
; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31]
501+
;
502+
; GFX11-FAKE16-LABEL: missing_truncate_promote_bswap:
503+
; GFX11-FAKE16: ; %bb.0: ; %bb
504+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
505+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
506+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
507+
; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
508+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
500509
bb:
501510
%tmp = trunc i32 %arg to i16
502511
%tmp1 = call i16 @llvm.bswap.i16(i16 %tmp)

0 commit comments

Comments
 (0)