Skip to content

Commit 24a18aa

Browse files
authored
[AMDGPU] Simplify printing row/bank_mask modifiers. (#100575)
And fix a codegen test to use mask values that fit their encoding fields. Part of <#62629>.
1 parent a27f816 commit 24a18aa

File tree

3 files changed

+14
-24
lines changed

3 files changed

+14
-24
lines changed

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
4747
printAnnotation(OS, Annot);
4848
}
4949

50-
void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
51-
const MCSubtargetInfo &STI,
52-
raw_ostream &O) {
53-
O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
54-
}
55-
5650
void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
5751
const MCSubtargetInfo &STI,
5852
raw_ostream &O) {
@@ -1149,15 +1143,13 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
11491143
void AMDGPUInstPrinter::printDppRowMask(const MCInst *MI, unsigned OpNo,
11501144
const MCSubtargetInfo &STI,
11511145
raw_ostream &O) {
1152-
O << " row_mask:";
1153-
printU4ImmOperand(MI, OpNo, STI, O);
1146+
O << " row_mask:" << formatHex(MI->getOperand(OpNo).getImm());
11541147
}
11551148

11561149
void AMDGPUInstPrinter::printDppBankMask(const MCInst *MI, unsigned OpNo,
11571150
const MCSubtargetInfo &STI,
11581151
raw_ostream &O) {
1159-
O << " bank_mask:";
1160-
printU4ImmOperand(MI, OpNo, STI, O);
1152+
O << " bank_mask:" << formatHex(MI->getOperand(OpNo).getImm());
11611153
}
11621154

11631155
void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ class AMDGPUInstPrinter : public MCInstPrinter {
3636
const MCRegisterInfo &MRI);
3737

3838
private:
39-
void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
40-
const MCSubtargetInfo &STI, raw_ostream &O);
4139
void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
4240
const MCSubtargetInfo &STI, raw_ostream &O);
4341
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb4(ptr addrspace(1) %out, float %
287287
; GFX8-NOOPT: s_nop 1
288288
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
289289
define amdgpu_kernel void @dpp_test_f32_imm_comb5(ptr addrspace(1) %out, float %in1, float %in2) {
290-
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 62, i32 61, i1 true)
290+
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 14, i32 13, i1 true)
291291
store float %tmp0, ptr addrspace(1) %out
292292
ret void
293293
}
@@ -300,7 +300,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb5(ptr addrspace(1) %out, float %
300300
; GFX8-NOOPT: s_nop 1
301301
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
302302
define amdgpu_kernel void @dpp_test_f32_imm_comb6(ptr addrspace(1) %out, float %in1, float %in2) {
303-
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 63, i32 63, i1 true)
303+
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 15, i32 15, i1 true)
304304
store float %tmp0, ptr addrspace(1) %out
305305
ret void
306306
}
@@ -314,7 +314,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb6(ptr addrspace(1) %out, float %
314314
; GFX8-NOOPT: s_nop 1
315315
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
316316
define amdgpu_kernel void @dpp_test_f32_imm_comb7(ptr addrspace(1) %out, float %in1, float %in2) {
317-
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 64, i32 64, i32 64, i1 true)
317+
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 64, i32 0, i32 0, i1 true)
318318
store float %tmp0, ptr addrspace(1) %out
319319
ret void
320320
}
@@ -327,7 +327,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb7(ptr addrspace(1) %out, float %
327327
; GFX8-NOOPT: s_nop 1
328328
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
329329
define amdgpu_kernel void @dpp_test_f32_imm_comb8(ptr addrspace(1) %out, float %in1, float %in2) {
330-
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 31, i32 63, i32 128, i1 true)
330+
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 31, i32 15, i32 0, i1 true)
331331
store float %tmp0, ptr addrspace(1) %out
332332
ret void
333333
}
@@ -405,7 +405,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb4(ptr addrspace(1) %out, <2 x
405405
; GFX8-NOOPT: s_nop 1
406406
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
407407
define amdgpu_kernel void @dpp_test_v2i16_imm_comb5(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
408-
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 62, i32 61, i1 true)
408+
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 14, i32 13, i1 true)
409409
store <2 x i16> %tmp0, ptr addrspace(1) %out
410410
ret void
411411
}
@@ -418,7 +418,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb5(ptr addrspace(1) %out, <2 x
418418
; GFX8-NOOPT: s_nop 1
419419
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
420420
define amdgpu_kernel void @dpp_test_v2i16_imm_comb6(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
421-
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 63, i32 63, i1 true)
421+
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 15, i32 15, i1 true)
422422
store <2 x i16> %tmp0, ptr addrspace(1) %out
423423
ret void
424424
}
@@ -431,7 +431,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb6(ptr addrspace(1) %out, <2 x
431431
; GFX8-NOOPT: s_nop 1
432432
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
433433
define amdgpu_kernel void @dpp_test_v2i16_imm_comb7(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
434-
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 64, i32 64, i32 64, i1 true)
434+
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 64, i32 0, i32 0, i1 true)
435435
store <2 x i16> %tmp0, ptr addrspace(1) %out
436436
ret void
437437
}
@@ -444,7 +444,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb7(ptr addrspace(1) %out, <2 x
444444
; GFX8-NOOPT: s_nop 1
445445
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
446446
define amdgpu_kernel void @dpp_test_v2i16_imm_comb8(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
447-
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 31, i32 63, i32 128, i1 true)
447+
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 31, i32 15, i32 0, i1 true)
448448
store <2 x i16> %tmp0, ptr addrspace(1) %out
449449
ret void
450450
}
@@ -522,7 +522,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb4(ptr addrspace(1) %out, <2 x
522522
; GFX8-NOOPT: s_nop 1
523523
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
524524
define amdgpu_kernel void @dpp_test_v2f16_imm_comb5(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
525-
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 62, i32 61, i1 true)
525+
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 14, i32 13, i1 true)
526526
store <2 x half> %tmp0, ptr addrspace(1) %out
527527
ret void
528528
}
@@ -535,7 +535,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb5(ptr addrspace(1) %out, <2 x
535535
; GFX8-NOOPT: s_nop 1
536536
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
537537
define amdgpu_kernel void @dpp_test_v2f16_imm_comb6(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
538-
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 63, i32 63, i1 true)
538+
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 15, i32 15, i1 true)
539539
store <2 x half> %tmp0, ptr addrspace(1) %out
540540
ret void
541541
}
@@ -548,7 +548,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb6(ptr addrspace(1) %out, <2 x
548548
; GFX8-NOOPT: s_nop 1
549549
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
550550
define amdgpu_kernel void @dpp_test_v2f16_imm_comb7(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
551-
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 64, i32 64, i32 64, i1 true)
551+
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 64, i32 0, i32 0, i1 true)
552552
store <2 x half> %tmp0, ptr addrspace(1) %out
553553
ret void
554554
}
@@ -561,7 +561,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb7(ptr addrspace(1) %out, <2 x
561561
; GFX8-NOOPT: s_nop 1
562562
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
563563
define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
564-
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 31, i32 63, i32 128, i1 true)
564+
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 31, i32 15, i32 0, i1 true)
565565
store <2 x half> %tmp0, ptr addrspace(1) %out
566566
ret void
567567
}

0 commit comments

Comments
 (0)