@@ -287,7 +287,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb4(ptr addrspace(1) %out, float %
287
287
; GFX8-NOOPT: s_nop 1
288
288
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
289
289
define amdgpu_kernel void @dpp_test_f32_imm_comb5 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
290
- %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 62 , i32 61 , i1 true )
290
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 14 , i32 13 , i1 true )
291
291
store float %tmp0 , ptr addrspace (1 ) %out
292
292
ret void
293
293
}
@@ -300,7 +300,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb5(ptr addrspace(1) %out, float %
300
300
; GFX8-NOOPT: s_nop 1
301
301
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
302
302
define amdgpu_kernel void @dpp_test_f32_imm_comb6 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
303
- %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 63 , i32 63 , i1 true )
303
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 15 , i32 15 , i1 true )
304
304
store float %tmp0 , ptr addrspace (1 ) %out
305
305
ret void
306
306
}
@@ -314,7 +314,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb6(ptr addrspace(1) %out, float %
314
314
; GFX8-NOOPT: s_nop 1
315
315
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
316
316
define amdgpu_kernel void @dpp_test_f32_imm_comb7 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
317
- %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 64 , i32 64 , i32 64 , i1 true )
317
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 64 , i32 0 , i32 0 , i1 true )
318
318
store float %tmp0 , ptr addrspace (1 ) %out
319
319
ret void
320
320
}
@@ -327,7 +327,7 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb7(ptr addrspace(1) %out, float %
327
327
; GFX8-NOOPT: s_nop 1
328
328
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
329
329
define amdgpu_kernel void @dpp_test_f32_imm_comb8 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
330
- %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 31 , i32 63 , i32 128 , i1 true )
330
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 31 , i32 15 , i32 0 , i1 true )
331
331
store float %tmp0 , ptr addrspace (1 ) %out
332
332
ret void
333
333
}
@@ -405,7 +405,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb4(ptr addrspace(1) %out, <2 x
405
405
; GFX8-NOOPT: s_nop 1
406
406
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
407
407
define amdgpu_kernel void @dpp_test_v2i16_imm_comb5 (ptr addrspace (1 ) %out , <2 x i16 > %in1 , <2 x i16 > %in2 ) {
408
- %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 63 , i32 62 , i32 61 , i1 true )
408
+ %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 63 , i32 14 , i32 13 , i1 true )
409
409
store <2 x i16 > %tmp0 , ptr addrspace (1 ) %out
410
410
ret void
411
411
}
@@ -418,7 +418,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb5(ptr addrspace(1) %out, <2 x
418
418
; GFX8-NOOPT: s_nop 1
419
419
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
420
420
define amdgpu_kernel void @dpp_test_v2i16_imm_comb6 (ptr addrspace (1 ) %out , <2 x i16 > %in1 , <2 x i16 > %in2 ) {
421
- %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 63 , i32 63 , i32 63 , i1 true )
421
+ %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 63 , i32 15 , i32 15 , i1 true )
422
422
store <2 x i16 > %tmp0 , ptr addrspace (1 ) %out
423
423
ret void
424
424
}
@@ -431,7 +431,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb6(ptr addrspace(1) %out, <2 x
431
431
; GFX8-NOOPT: s_nop 1
432
432
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
433
433
define amdgpu_kernel void @dpp_test_v2i16_imm_comb7 (ptr addrspace (1 ) %out , <2 x i16 > %in1 , <2 x i16 > %in2 ) {
434
- %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 64 , i32 64 , i32 64 , i1 true )
434
+ %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 64 , i32 0 , i32 0 , i1 true )
435
435
store <2 x i16 > %tmp0 , ptr addrspace (1 ) %out
436
436
ret void
437
437
}
@@ -444,7 +444,7 @@ define amdgpu_kernel void @dpp_test_v2i16_imm_comb7(ptr addrspace(1) %out, <2 x
444
444
; GFX8-NOOPT: s_nop 1
445
445
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
446
446
define amdgpu_kernel void @dpp_test_v2i16_imm_comb8 (ptr addrspace (1 ) %out , <2 x i16 > %in1 , <2 x i16 > %in2 ) {
447
- %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 31 , i32 63 , i32 128 , i1 true )
447
+ %tmp0 = call <2 x i16 > @llvm.amdgcn.update.dpp.v2i16 (<2 x i16 > %in1 , <2 x i16 > %in2 , i32 31 , i32 15 , i32 0 , i1 true )
448
448
store <2 x i16 > %tmp0 , ptr addrspace (1 ) %out
449
449
ret void
450
450
}
@@ -522,7 +522,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb4(ptr addrspace(1) %out, <2 x
522
522
; GFX8-NOOPT: s_nop 1
523
523
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
524
524
define amdgpu_kernel void @dpp_test_v2f16_imm_comb5 (ptr addrspace (1 ) %out , <2 x half > %in1 , <2 x half > %in2 ) {
525
- %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 63 , i32 62 , i32 61 , i1 true )
525
+ %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 63 , i32 14 , i32 13 , i1 true )
526
526
store <2 x half > %tmp0 , ptr addrspace (1 ) %out
527
527
ret void
528
528
}
@@ -535,7 +535,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb5(ptr addrspace(1) %out, <2 x
535
535
; GFX8-NOOPT: s_nop 1
536
536
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
537
537
define amdgpu_kernel void @dpp_test_v2f16_imm_comb6 (ptr addrspace (1 ) %out , <2 x half > %in1 , <2 x half > %in2 ) {
538
- %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 63 , i32 63 , i32 63 , i1 true )
538
+ %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 63 , i32 15 , i32 15 , i1 true )
539
539
store <2 x half > %tmp0 , ptr addrspace (1 ) %out
540
540
ret void
541
541
}
@@ -548,7 +548,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb6(ptr addrspace(1) %out, <2 x
548
548
; GFX8-NOOPT: s_nop 1
549
549
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
550
550
define amdgpu_kernel void @dpp_test_v2f16_imm_comb7 (ptr addrspace (1 ) %out , <2 x half > %in1 , <2 x half > %in2 ) {
551
- %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 64 , i32 64 , i32 64 , i1 true )
551
+ %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 64 , i32 0 , i32 0 , i1 true )
552
552
store <2 x half > %tmp0 , ptr addrspace (1 ) %out
553
553
ret void
554
554
}
@@ -561,7 +561,7 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb7(ptr addrspace(1) %out, <2 x
561
561
; GFX8-NOOPT: s_nop 1
562
562
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
563
563
define amdgpu_kernel void @dpp_test_v2f16_imm_comb8 (ptr addrspace (1 ) %out , <2 x half > %in1 , <2 x half > %in2 ) {
564
- %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 31 , i32 63 , i32 128 , i1 true )
564
+ %tmp0 = call <2 x half > @llvm.amdgcn.update.dpp.v2f16 (<2 x half > %in1 , <2 x half > %in2 , i32 31 , i32 15 , i32 0 , i1 true )
565
565
store <2 x half > %tmp0 , ptr addrspace (1 ) %out
566
566
ret void
567
567
}
0 commit comments