Skip to content

Commit 3f23227

Browse files
committed
rebase tests
1 parent ed55fef commit 3f23227

File tree

2 files changed

+132
-138
lines changed

2 files changed

+132
-138
lines changed

llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,17 @@ define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i
5555
; VI-NEXT: s_load_dword s2, s[6:7], 0x8
5656
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
5757
; VI-NEXT: s_waitcnt lgkmcnt(0)
58-
; VI-NEXT: v_mov_b32_e32 v2, s2
58+
; VI-NEXT: s_lshr_b32 s3, s2, 8
5959
; VI-NEXT: v_mov_b32_e32 v0, s0
60-
; VI-NEXT: v_lshrrev_b16_e64 v3, 8, s2
61-
; VI-NEXT: s_add_u32 s2, s0, 1
6260
; VI-NEXT: v_mov_b32_e32 v1, s1
63-
; VI-NEXT: s_addc_u32 s3, s1, 0
64-
; VI-NEXT: flat_store_byte v[0:1], v3
61+
; VI-NEXT: s_add_u32 s0, s0, 1
62+
; VI-NEXT: v_mov_b32_e32 v2, s3
63+
; VI-NEXT: s_addc_u32 s1, s1, 0
64+
; VI-NEXT: flat_store_byte v[0:1], v2
6565
; VI-NEXT: s_waitcnt vmcnt(0)
66-
; VI-NEXT: v_mov_b32_e32 v0, s2
67-
; VI-NEXT: v_mov_b32_e32 v1, s3
66+
; VI-NEXT: v_mov_b32_e32 v0, s0
67+
; VI-NEXT: v_mov_b32_e32 v1, s1
68+
; VI-NEXT: v_mov_b32_e32 v2, s2
6869
; VI-NEXT: flat_store_byte v[0:1], v2
6970
; VI-NEXT: s_waitcnt vmcnt(0)
7071
; VI-NEXT: s_endpgm
@@ -371,10 +372,11 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v2i8(ptr addrspace(1) %out
371372
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
372373
; VI-NEXT: s_waitcnt lgkmcnt(0)
373374
; VI-NEXT: s_lshl_b32 s2, s2, 3
374-
; VI-NEXT: v_mov_b32_e32 v0, s3
375-
; VI-NEXT: v_lshrrev_b16_e32 v2, s2, v0
375+
; VI-NEXT: s_and_b32 s3, s3, 0xffff
376+
; VI-NEXT: s_lshr_b32 s2, s3, s2
376377
; VI-NEXT: v_mov_b32_e32 v0, s0
377378
; VI-NEXT: v_mov_b32_e32 v1, s1
379+
; VI-NEXT: v_mov_b32_e32 v2, s2
378380
; VI-NEXT: flat_store_byte v[0:1], v2
379381
; VI-NEXT: s_waitcnt vmcnt(0)
380382
; VI-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll

Lines changed: 121 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0
2222
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
2323
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
2424
; VI-NEXT: s_waitcnt lgkmcnt(0)
25-
; VI-NEXT: s_lshr_b32 s2, s4, 16
26-
; VI-NEXT: s_sub_i32 s3, 0, s4
25+
; VI-NEXT: s_sub_i32 s2, 0, s4
26+
; VI-NEXT: s_lshr_b32 s3, s4, 16
2727
; VI-NEXT: s_ashr_i32 s5, s4, 16
28+
; VI-NEXT: s_sub_i32 s3, 0, s3
29+
; VI-NEXT: s_sext_i32_i16 s2, s2
2830
; VI-NEXT: s_sext_i32_i16 s4, s4
29-
; VI-NEXT: s_sub_i32 s2, 0, s2
3031
; VI-NEXT: s_sext_i32_i16 s3, s3
31-
; VI-NEXT: s_sext_i32_i16 s2, s2
32-
; VI-NEXT: s_max_i32 s3, s4, s3
33-
; VI-NEXT: s_max_i32 s2, s5, s2
34-
; VI-NEXT: s_add_i32 s3, s3, 2
35-
; VI-NEXT: s_lshl_b32 s2, s2, 16
36-
; VI-NEXT: s_and_b32 s3, s3, 0xffff
37-
; VI-NEXT: s_or_b32 s2, s2, s3
32+
; VI-NEXT: s_max_i32 s2, s4, s2
33+
; VI-NEXT: s_max_i32 s3, s5, s3
34+
; VI-NEXT: s_add_i32 s2, s2, 2
35+
; VI-NEXT: s_lshl_b32 s3, s3, 16
36+
; VI-NEXT: s_and_b32 s2, s2, 0xffff
37+
; VI-NEXT: s_or_b32 s2, s3, s2
3838
; VI-NEXT: s_add_i32 s2, s2, 0x20000
3939
; VI-NEXT: v_mov_b32_e32 v0, s0
4040
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -171,19 +171,19 @@ define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val)
171171
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
172172
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
173173
; VI-NEXT: s_waitcnt lgkmcnt(0)
174-
; VI-NEXT: s_lshr_b32 s2, s4, 16
175-
; VI-NEXT: s_sub_i32 s3, 0, s4
174+
; VI-NEXT: s_sub_i32 s2, 0, s4
175+
; VI-NEXT: s_lshr_b32 s3, s4, 16
176176
; VI-NEXT: s_ashr_i32 s5, s4, 16
177+
; VI-NEXT: s_sub_i32 s3, 0, s3
178+
; VI-NEXT: s_sext_i32_i16 s2, s2
177179
; VI-NEXT: s_sext_i32_i16 s4, s4
178-
; VI-NEXT: s_sub_i32 s2, 0, s2
179180
; VI-NEXT: s_sext_i32_i16 s3, s3
180-
; VI-NEXT: s_sext_i32_i16 s2, s2
181-
; VI-NEXT: s_max_i32 s3, s4, s3
182-
; VI-NEXT: s_max_i32 s2, s5, s2
183-
; VI-NEXT: s_add_i32 s3, s3, 2
184-
; VI-NEXT: s_lshl_b32 s2, s2, 16
185-
; VI-NEXT: s_and_b32 s3, s3, 0xffff
186-
; VI-NEXT: s_or_b32 s2, s2, s3
181+
; VI-NEXT: s_max_i32 s2, s4, s2
182+
; VI-NEXT: s_max_i32 s3, s5, s3
183+
; VI-NEXT: s_add_i32 s2, s2, 2
184+
; VI-NEXT: s_lshl_b32 s3, s3, 16
185+
; VI-NEXT: s_and_b32 s2, s2, 0xffff
186+
; VI-NEXT: s_or_b32 s2, s3, s2
187187
; VI-NEXT: s_add_i32 s2, s2, 0x20000
188188
; VI-NEXT: v_mov_b32_e32 v0, s0
189189
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -331,31 +331,31 @@ define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0
331331
; VI: ; %bb.0:
332332
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
333333
; VI-NEXT: s_waitcnt lgkmcnt(0)
334-
; VI-NEXT: s_lshr_b32 s4, s2, 16
335-
; VI-NEXT: s_lshr_b32 s5, s3, 16
336-
; VI-NEXT: s_sub_i32 s6, 0, s3
337-
; VI-NEXT: s_sub_i32 s7, 0, s2
338-
; VI-NEXT: s_sub_i32 s5, 0, s5
339-
; VI-NEXT: s_sub_i32 s4, 0, s4
334+
; VI-NEXT: s_lshr_b32 s7, s2, 16
335+
; VI-NEXT: s_sub_i32 s7, 0, s7
336+
; VI-NEXT: s_sub_i32 s4, 0, s3
337+
; VI-NEXT: s_lshr_b32 s6, s3, 16
340338
; VI-NEXT: s_ashr_i32 s8, s2, 16
341-
; VI-NEXT: s_ashr_i32 s9, s3, 16
342-
; VI-NEXT: s_sext_i32_i16 s2, s2
343-
; VI-NEXT: s_sext_i32_i16 s3, s3
344339
; VI-NEXT: s_sext_i32_i16 s7, s7
345-
; VI-NEXT: s_sext_i32_i16 s6, s6
340+
; VI-NEXT: s_sub_i32 s5, 0, s2
341+
; VI-NEXT: s_sub_i32 s6, 0, s6
342+
; VI-NEXT: s_max_i32 s7, s8, s7
343+
; VI-NEXT: s_ashr_i32 s8, s3, 16
346344
; VI-NEXT: s_sext_i32_i16 s4, s4
345+
; VI-NEXT: s_sext_i32_i16 s3, s3
346+
; VI-NEXT: s_sext_i32_i16 s6, s6
347347
; VI-NEXT: s_sext_i32_i16 s5, s5
348-
; VI-NEXT: s_max_i32 s3, s3, s6
349-
; VI-NEXT: s_max_i32 s2, s2, s7
350-
; VI-NEXT: s_max_i32 s5, s9, s5
351-
; VI-NEXT: s_max_i32 s4, s8, s4
352-
; VI-NEXT: s_add_i32 s2, s2, 2
348+
; VI-NEXT: s_sext_i32_i16 s2, s2
349+
; VI-NEXT: s_max_i32 s3, s3, s4
350+
; VI-NEXT: s_max_i32 s6, s8, s6
351+
; VI-NEXT: s_max_i32 s2, s2, s5
353352
; VI-NEXT: s_add_i32 s3, s3, 2
354-
; VI-NEXT: s_lshl_b32 s4, s4, 16
355-
; VI-NEXT: s_lshl_b32 s5, s5, 16
353+
; VI-NEXT: s_lshl_b32 s4, s6, 16
356354
; VI-NEXT: s_and_b32 s3, s3, 0xffff
355+
; VI-NEXT: s_add_i32 s2, s2, 2
356+
; VI-NEXT: s_or_b32 s3, s4, s3
357+
; VI-NEXT: s_lshl_b32 s4, s7, 16
357358
; VI-NEXT: s_and_b32 s2, s2, 0xffff
358-
; VI-NEXT: s_or_b32 s3, s5, s3
359359
; VI-NEXT: s_or_b32 s2, s4, s2
360360
; VI-NEXT: s_add_i32 s3, s3, 0x20000
361361
; VI-NEXT: s_add_i32 s2, s2, 0x20000
@@ -559,21 +559,21 @@ define amdgpu_kernel void @s_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
559559
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
560560
; VI-NEXT: s_waitcnt lgkmcnt(0)
561561
; VI-NEXT: v_mov_b32_e32 v0, s4
562-
; VI-NEXT: s_ashr_i32 s2, s0, 16
563-
; VI-NEXT: s_sext_i32_i16 s0, s0
564-
; VI-NEXT: s_ashr_i32 s3, s1, 16
562+
; VI-NEXT: s_ashr_i32 s2, s1, 16
563+
; VI-NEXT: s_ashr_i32 s3, s0, 16
565564
; VI-NEXT: s_sext_i32_i16 s1, s1
565+
; VI-NEXT: s_sext_i32_i16 s0, s0
566566
; VI-NEXT: v_mov_b32_e32 v1, s5
567-
; VI-NEXT: s_max_i32 s4, s2, s3
567+
; VI-NEXT: s_max_i32 s4, s3, s2
568568
; VI-NEXT: s_max_i32 s5, s0, s1
569569
; VI-NEXT: s_lshl_b32 s4, s4, 16
570570
; VI-NEXT: s_and_b32 s5, s5, 0xffff
571-
; VI-NEXT: s_min_i32 s2, s2, s3
571+
; VI-NEXT: s_min_i32 s2, s3, s2
572572
; VI-NEXT: s_min_i32 s0, s0, s1
573573
; VI-NEXT: s_or_b32 s4, s5, s4
574-
; VI-NEXT: s_lshl_b32 s1, s2, 16
574+
; VI-NEXT: s_lshl_b32 s2, s2, 16
575575
; VI-NEXT: s_and_b32 s0, s0, 0xffff
576-
; VI-NEXT: s_or_b32 s0, s0, s1
576+
; VI-NEXT: s_or_b32 s0, s0, s2
577577
; VI-NEXT: v_mov_b32_e32 v4, s4
578578
; VI-NEXT: v_mov_b32_e32 v2, s6
579579
; VI-NEXT: v_mov_b32_e32 v3, s7
@@ -661,12 +661,12 @@ define amdgpu_kernel void @v_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
661661
; VI-NEXT: v_mov_b32_e32 v1, s1
662662
; VI-NEXT: v_mov_b32_e32 v2, s2
663663
; VI-NEXT: v_mov_b32_e32 v3, s3
664-
; VI-NEXT: v_max_i32_sdwa v6, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
665-
; VI-NEXT: v_max_i32_sdwa v7, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
666-
; VI-NEXT: v_min_i32_sdwa v8, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
667-
; VI-NEXT: v_min_i32_sdwa v4, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
668-
; VI-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
669-
; VI-NEXT: v_or_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
664+
; VI-NEXT: v_max_i32_sdwa v6, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
665+
; VI-NEXT: v_max_i32_sdwa v7, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
666+
; VI-NEXT: v_min_i32_sdwa v8, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
667+
; VI-NEXT: v_min_i32_sdwa v4, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
668+
; VI-NEXT: v_or_b32_sdwa v5, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
669+
; VI-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
670670
; VI-NEXT: flat_store_dword v[0:1], v5
671671
; VI-NEXT: s_waitcnt vmcnt(0)
672672
; VI-NEXT: flat_store_dword v[2:3], v4
@@ -748,37 +748,37 @@ define amdgpu_kernel void @s_min_max_v4i16(ptr addrspace(1) %out0, ptr addrspace
748748
; VI-NEXT: s_waitcnt lgkmcnt(0)
749749
; VI-NEXT: v_mov_b32_e32 v0, s0
750750
; VI-NEXT: v_mov_b32_e32 v1, s1
751-
; VI-NEXT: v_mov_b32_e32 v2, s2
752751
; VI-NEXT: v_mov_b32_e32 v3, s3
753-
; VI-NEXT: s_ashr_i32 s0, s5, 16
754-
; VI-NEXT: s_ashr_i32 s1, s4, 16
755-
; VI-NEXT: s_sext_i32_i16 s2, s5
756-
; VI-NEXT: s_sext_i32_i16 s3, s4
757-
; VI-NEXT: s_ashr_i32 s4, s7, 16
758-
; VI-NEXT: s_ashr_i32 s5, s6, 16
759-
; VI-NEXT: s_sext_i32_i16 s7, s7
752+
; VI-NEXT: s_ashr_i32 s0, s7, 16
753+
; VI-NEXT: s_ashr_i32 s1, s5, 16
754+
; VI-NEXT: s_sext_i32_i16 s3, s7
755+
; VI-NEXT: s_sext_i32_i16 s5, s5
756+
; VI-NEXT: v_mov_b32_e32 v2, s2
757+
; VI-NEXT: s_max_i32 s2, s1, s0
758+
; VI-NEXT: s_max_i32 s7, s5, s3
759+
; VI-NEXT: s_lshl_b32 s2, s2, 16
760+
; VI-NEXT: s_and_b32 s7, s7, 0xffff
761+
; VI-NEXT: s_or_b32 s2, s7, s2
762+
; VI-NEXT: s_ashr_i32 s7, s6, 16
763+
; VI-NEXT: s_ashr_i32 s8, s4, 16
760764
; VI-NEXT: s_sext_i32_i16 s6, s6
761-
; VI-NEXT: s_max_i32 s8, s1, s5
762-
; VI-NEXT: s_max_i32 s9, s0, s4
763-
; VI-NEXT: s_max_i32 s10, s3, s6
764-
; VI-NEXT: s_max_i32 s11, s2, s7
765-
; VI-NEXT: s_min_i32 s0, s0, s4
766-
; VI-NEXT: s_min_i32 s2, s2, s7
765+
; VI-NEXT: s_sext_i32_i16 s4, s4
766+
; VI-NEXT: s_min_i32 s0, s1, s0
767+
; VI-NEXT: s_min_i32 s1, s5, s3
768+
; VI-NEXT: s_max_i32 s9, s8, s7
769+
; VI-NEXT: s_max_i32 s10, s4, s6
770+
; VI-NEXT: s_lshl_b32 s0, s0, 16
771+
; VI-NEXT: s_and_b32 s1, s1, 0xffff
767772
; VI-NEXT: s_lshl_b32 s9, s9, 16
768-
; VI-NEXT: s_and_b32 s11, s11, 0xffff
769-
; VI-NEXT: s_lshl_b32 s8, s8, 16
770773
; VI-NEXT: s_and_b32 s10, s10, 0xffff
771-
; VI-NEXT: s_min_i32 s1, s1, s5
772-
; VI-NEXT: s_min_i32 s3, s3, s6
773-
; VI-NEXT: s_lshl_b32 s0, s0, 16
774-
; VI-NEXT: s_and_b32 s2, s2, 0xffff
775-
; VI-NEXT: s_or_b32 s9, s11, s9
776-
; VI-NEXT: s_or_b32 s8, s10, s8
777-
; VI-NEXT: s_or_b32 s0, s2, s0
774+
; VI-NEXT: v_mov_b32_e32 v5, s2
775+
; VI-NEXT: s_or_b32 s0, s1, s0
776+
; VI-NEXT: s_min_i32 s1, s8, s7
777+
; VI-NEXT: s_min_i32 s2, s4, s6
778+
; VI-NEXT: s_or_b32 s9, s10, s9
778779
; VI-NEXT: s_lshl_b32 s1, s1, 16
779-
; VI-NEXT: s_and_b32 s2, s3, 0xffff
780-
; VI-NEXT: v_mov_b32_e32 v4, s8
781-
; VI-NEXT: v_mov_b32_e32 v5, s9
780+
; VI-NEXT: s_and_b32 s2, s2, 0xffff
781+
; VI-NEXT: v_mov_b32_e32 v4, s9
782782
; VI-NEXT: s_or_b32 s1, s2, s1
783783
; VI-NEXT: v_mov_b32_e32 v6, s1
784784
; VI-NEXT: v_mov_b32_e32 v7, s0
@@ -861,26 +861,26 @@ define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addr
861861
; GFX9-NEXT: global_load_dword v2, v0, s[10:11] glc
862862
; GFX9-NEXT: s_waitcnt vmcnt(0)
863863
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
864+
; GFX9-NEXT: v_cmp_gt_i32_sdwa vcc, sext(v1), sext(v2) src0_sel:WORD_0 src1_sel:WORD_0
865+
; GFX9-NEXT: v_cmp_gt_i32_sdwa s[0:1], sext(v1), sext(v2) src0_sel:WORD_1 src1_sel:WORD_1
864866
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
865-
; GFX9-NEXT: v_cmp_gt_i16_e32 vcc, v1, v2
866867
; GFX9-NEXT: v_cndmask_b32_e32 v5, v2, v1, vcc
867-
; GFX9-NEXT: v_cmp_gt_i16_e64 s[0:1], v3, v4
868868
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
869+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
869870
; GFX9-NEXT: v_cndmask_b32_e64 v6, v4, v3, s[0:1]
870-
; GFX9-NEXT: v_and_b32_e32 v5, 0xffff, v5
871871
; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
872+
; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v5
873+
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
872874
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
873-
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
874-
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
875-
; GFX9-NEXT: v_lshl_or_b32 v5, v6, 16, v5
875+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
876+
; GFX9-NEXT: v_lshl_or_b32 v4, v6, 16, v4
876877
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
877-
; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v4
878-
; GFX9-NEXT: global_store_dword v0, v5, s[4:5]
878+
; GFX9-NEXT: v_or_b32_e32 v2, v5, v2
879+
; GFX9-NEXT: global_store_dword v0, v4, s[4:5]
879880
; GFX9-NEXT: s_waitcnt vmcnt(0)
880881
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
881882
; GFX9-NEXT: s_waitcnt vmcnt(0)
882-
; GFX9-NEXT: v_or_b32_e32 v0, v2, v3
883-
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
883+
; GFX9-NEXT: v_and_b32_e32 v0, 3, v2
884884
; GFX9-NEXT: global_store_byte v[0:1], v0, off
885885
; GFX9-NEXT: s_waitcnt vmcnt(0)
886886
; GFX9-NEXT: s_endpgm
@@ -899,42 +899,34 @@ define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addr
899899
; VI-NEXT: s_waitcnt vmcnt(0)
900900
; VI-NEXT: v_mov_b32_e32 v0, s0
901901
; VI-NEXT: v_mov_b32_e32 v1, s1
902-
; VI-NEXT: v_mov_b32_e32 v3, s3
903902
; VI-NEXT: v_mov_b32_e32 v2, s2
904-
; VI-NEXT: v_readfirstlane_b32 s0, v4
905-
; VI-NEXT: v_readfirstlane_b32 s1, v5
906-
; VI-NEXT: s_ashr_i32 s3, s0, 16
907-
; VI-NEXT: s_ashr_i32 s5, s1, 16
908-
; VI-NEXT: s_cmp_gt_i32 s3, s5
909-
; VI-NEXT: s_sext_i32_i16 s2, s0
910-
; VI-NEXT: s_sext_i32_i16 s4, s1
911-
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
912-
; VI-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
913-
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
914-
; VI-NEXT: s_cselect_b32 s0, s3, s5
915-
; VI-NEXT: s_cselect_b32 s3, s5, s3
916-
; VI-NEXT: s_lshl_b32 s5, s0, 16
917-
; VI-NEXT: s_cmp_gt_i32 s2, s4
918-
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
919-
; VI-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1]
920-
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
921-
; VI-NEXT: s_cselect_b32 s0, s2, s4
922-
; VI-NEXT: s_cselect_b32 s1, s4, s2
923-
; VI-NEXT: s_and_b32 s0, s0, 0xffff
924-
; VI-NEXT: v_lshlrev_b16_e32 v4, 1, v4
925-
; VI-NEXT: s_lshl_b32 s2, s3, 16
926-
; VI-NEXT: s_and_b32 s1, s1, 0xffff
927-
; VI-NEXT: s_or_b32 s0, s0, s5
928-
; VI-NEXT: v_or_b32_e32 v4, v5, v4
929-
; VI-NEXT: s_or_b32 s1, s1, s2
930-
; VI-NEXT: v_mov_b32_e32 v5, s0
931-
; VI-NEXT: v_and_b32_e32 v4, 3, v4
932-
; VI-NEXT: v_mov_b32_e32 v6, s1
933-
; VI-NEXT: flat_store_dword v[0:1], v5
903+
; VI-NEXT: v_mov_b32_e32 v3, s3
904+
; VI-NEXT: v_ashrrev_i32_e32 v10, 16, v4
905+
; VI-NEXT: v_ashrrev_i32_e32 v11, 16, v5
906+
; VI-NEXT: v_bfe_i32 v6, v4, 0, 16
907+
; VI-NEXT: v_bfe_i32 v7, v5, 0, 16
908+
; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v4
909+
; VI-NEXT: v_lshrrev_b32_e32 v9, 16, v5
910+
; VI-NEXT: v_cmp_gt_i32_e32 vcc, v10, v11
911+
; VI-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
912+
; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], v6, v7
913+
; VI-NEXT: v_cndmask_b32_e64 v6, v5, v4, s[0:1]
914+
; VI-NEXT: v_cndmask_b32_e32 v7, v8, v9, vcc
915+
; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v10
916+
; VI-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1]
917+
; VI-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
918+
; VI-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1]
919+
; VI-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
920+
; VI-NEXT: v_lshlrev_b32_e32 v5, 1, v5
921+
; VI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
922+
; VI-NEXT: flat_store_dword v[0:1], v6
934923
; VI-NEXT: s_waitcnt vmcnt(0)
935-
; VI-NEXT: flat_store_dword v[2:3], v6
924+
; VI-NEXT: v_or_b32_e32 v0, v9, v5
925+
; VI-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
926+
; VI-NEXT: v_and_b32_e32 v0, 3, v0
927+
; VI-NEXT: flat_store_dword v[2:3], v4
936928
; VI-NEXT: s_waitcnt vmcnt(0)
937-
; VI-NEXT: flat_store_byte v[0:1], v4
929+
; VI-NEXT: flat_store_byte v[0:1], v0
938930
; VI-NEXT: s_waitcnt vmcnt(0)
939931
; VI-NEXT: s_endpgm
940932
;
@@ -1021,19 +1013,19 @@ define amdgpu_kernel void @u_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
10211013
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
10221014
; VI-NEXT: s_waitcnt lgkmcnt(0)
10231015
; VI-NEXT: v_mov_b32_e32 v1, s5
1024-
; VI-NEXT: s_lshr_b32 s2, s0, 16
1025-
; VI-NEXT: s_lshr_b32 s3, s1, 16
1026-
; VI-NEXT: s_and_b32 s0, s0, 0xffff
1027-
; VI-NEXT: s_and_b32 s1, s1, 0xffff
1028-
; VI-NEXT: s_max_u32 s5, s2, s3
1016+
; VI-NEXT: s_and_b32 s2, s1, 0xffff
1017+
; VI-NEXT: s_and_b32 s3, s0, 0xffff
1018+
; VI-NEXT: s_lshr_b32 s1, s1, 16
1019+
; VI-NEXT: s_lshr_b32 s0, s0, 16
1020+
; VI-NEXT: s_max_u32 s5, s0, s1
10291021
; VI-NEXT: v_mov_b32_e32 v0, s4
1030-
; VI-NEXT: s_max_u32 s4, s0, s1
1022+
; VI-NEXT: s_max_u32 s4, s3, s2
10311023
; VI-NEXT: s_lshl_b32 s5, s5, 16
10321024
; VI-NEXT: s_min_u32 s0, s0, s1
1033-
; VI-NEXT: s_min_u32 s1, s2, s3
10341025
; VI-NEXT: s_or_b32 s4, s4, s5
1035-
; VI-NEXT: s_lshl_b32 s1, s1, 16
1036-
; VI-NEXT: s_or_b32 s0, s0, s1
1026+
; VI-NEXT: s_min_u32 s2, s3, s2
1027+
; VI-NEXT: s_lshl_b32 s0, s0, 16
1028+
; VI-NEXT: s_or_b32 s0, s2, s0
10371029
; VI-NEXT: v_mov_b32_e32 v4, s4
10381030
; VI-NEXT: v_mov_b32_e32 v2, s6
10391031
; VI-NEXT: v_mov_b32_e32 v3, s7

0 commit comments

Comments
 (0)