Skip to content

Commit 9d14c13

Browse files
committed
fix tests
1 parent 7bc7ca2 commit 9d14c13

File tree

2 files changed

+138
-132
lines changed

2 files changed

+138
-132
lines changed

llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,16 @@ define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i
5555
; VI-NEXT: s_load_dword s2, s[6:7], 0x8
5656
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
5757
; VI-NEXT: s_waitcnt lgkmcnt(0)
58-
; VI-NEXT: s_lshr_b32 s3, s2, 8
58+
; VI-NEXT: v_mov_b32_e32 v2, s2
5959
; VI-NEXT: v_mov_b32_e32 v0, s0
60+
; VI-NEXT: v_lshrrev_b16_e64 v3, 8, s2
61+
; VI-NEXT: s_add_u32 s2, s0, 1
6062
; VI-NEXT: v_mov_b32_e32 v1, s1
61-
; VI-NEXT: s_add_u32 s0, s0, 1
62-
; VI-NEXT: v_mov_b32_e32 v2, s3
63-
; VI-NEXT: s_addc_u32 s1, s1, 0
64-
; VI-NEXT: flat_store_byte v[0:1], v2
63+
; VI-NEXT: s_addc_u32 s3, s1, 0
64+
; VI-NEXT: flat_store_byte v[0:1], v3
6565
; VI-NEXT: s_waitcnt vmcnt(0)
66-
; VI-NEXT: v_mov_b32_e32 v0, s0
67-
; VI-NEXT: v_mov_b32_e32 v1, s1
68-
; VI-NEXT: v_mov_b32_e32 v2, s2
66+
; VI-NEXT: v_mov_b32_e32 v0, s2
67+
; VI-NEXT: v_mov_b32_e32 v1, s3
6968
; VI-NEXT: flat_store_byte v[0:1], v2
7069
; VI-NEXT: s_waitcnt vmcnt(0)
7170
; VI-NEXT: s_endpgm
@@ -372,11 +371,10 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v2i8(ptr addrspace(1) %out
372371
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
373372
; VI-NEXT: s_waitcnt lgkmcnt(0)
374373
; VI-NEXT: s_lshl_b32 s2, s2, 3
375-
; VI-NEXT: s_and_b32 s3, s3, 0xffff
376-
; VI-NEXT: s_lshr_b32 s2, s3, s2
374+
; VI-NEXT: v_mov_b32_e32 v0, s3
375+
; VI-NEXT: v_lshrrev_b16_e32 v2, s2, v0
377376
; VI-NEXT: v_mov_b32_e32 v0, s0
378377
; VI-NEXT: v_mov_b32_e32 v1, s1
379-
; VI-NEXT: v_mov_b32_e32 v2, s2
380378
; VI-NEXT: flat_store_byte v[0:1], v2
381379
; VI-NEXT: s_waitcnt vmcnt(0)
382380
; VI-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll

Lines changed: 129 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0
2222
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
2323
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
2424
; VI-NEXT: s_waitcnt lgkmcnt(0)
25-
; VI-NEXT: s_sub_i32 s2, 0, s4
26-
; VI-NEXT: s_lshr_b32 s3, s4, 16
25+
; VI-NEXT: s_lshr_b32 s2, s4, 16
26+
; VI-NEXT: s_sub_i32 s3, 0, s4
2727
; VI-NEXT: s_ashr_i32 s5, s4, 16
28-
; VI-NEXT: s_sub_i32 s3, 0, s3
29-
; VI-NEXT: s_sext_i32_i16 s2, s2
3028
; VI-NEXT: s_sext_i32_i16 s4, s4
29+
; VI-NEXT: s_sub_i32 s2, 0, s2
3130
; VI-NEXT: s_sext_i32_i16 s3, s3
32-
; VI-NEXT: s_max_i32 s2, s4, s2
33-
; VI-NEXT: s_max_i32 s3, s5, s3
34-
; VI-NEXT: s_add_i32 s2, s2, 2
35-
; VI-NEXT: s_lshl_b32 s3, s3, 16
36-
; VI-NEXT: s_and_b32 s2, s2, 0xffff
37-
; VI-NEXT: s_or_b32 s2, s3, s2
31+
; VI-NEXT: s_sext_i32_i16 s2, s2
32+
; VI-NEXT: s_max_i32 s3, s4, s3
33+
; VI-NEXT: s_max_i32 s2, s5, s2
34+
; VI-NEXT: s_add_i32 s3, s3, 2
35+
; VI-NEXT: s_lshl_b32 s2, s2, 16
36+
; VI-NEXT: s_and_b32 s3, s3, 0xffff
37+
; VI-NEXT: s_or_b32 s2, s2, s3
3838
; VI-NEXT: s_add_i32 s2, s2, 0x20000
3939
; VI-NEXT: v_mov_b32_e32 v0, s0
4040
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -171,19 +171,19 @@ define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val)
171171
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
172172
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
173173
; VI-NEXT: s_waitcnt lgkmcnt(0)
174-
; VI-NEXT: s_sub_i32 s2, 0, s4
175-
; VI-NEXT: s_lshr_b32 s3, s4, 16
174+
; VI-NEXT: s_lshr_b32 s2, s4, 16
175+
; VI-NEXT: s_sub_i32 s3, 0, s4
176176
; VI-NEXT: s_ashr_i32 s5, s4, 16
177-
; VI-NEXT: s_sub_i32 s3, 0, s3
178-
; VI-NEXT: s_sext_i32_i16 s2, s2
179177
; VI-NEXT: s_sext_i32_i16 s4, s4
178+
; VI-NEXT: s_sub_i32 s2, 0, s2
180179
; VI-NEXT: s_sext_i32_i16 s3, s3
181-
; VI-NEXT: s_max_i32 s2, s4, s2
182-
; VI-NEXT: s_max_i32 s3, s5, s3
183-
; VI-NEXT: s_add_i32 s2, s2, 2
184-
; VI-NEXT: s_lshl_b32 s3, s3, 16
185-
; VI-NEXT: s_and_b32 s2, s2, 0xffff
186-
; VI-NEXT: s_or_b32 s2, s3, s2
180+
; VI-NEXT: s_sext_i32_i16 s2, s2
181+
; VI-NEXT: s_max_i32 s3, s4, s3
182+
; VI-NEXT: s_max_i32 s2, s5, s2
183+
; VI-NEXT: s_add_i32 s3, s3, 2
184+
; VI-NEXT: s_lshl_b32 s2, s2, 16
185+
; VI-NEXT: s_and_b32 s3, s3, 0xffff
186+
; VI-NEXT: s_or_b32 s2, s2, s3
187187
; VI-NEXT: s_add_i32 s2, s2, 0x20000
188188
; VI-NEXT: v_mov_b32_e32 v0, s0
189189
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -331,31 +331,31 @@ define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0
331331
; VI: ; %bb.0:
332332
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
333333
; VI-NEXT: s_waitcnt lgkmcnt(0)
334-
; VI-NEXT: s_lshr_b32 s7, s2, 16
335-
; VI-NEXT: s_sub_i32 s7, 0, s7
336-
; VI-NEXT: s_sub_i32 s4, 0, s3
337-
; VI-NEXT: s_lshr_b32 s6, s3, 16
334+
; VI-NEXT: s_lshr_b32 s4, s2, 16
335+
; VI-NEXT: s_lshr_b32 s5, s3, 16
336+
; VI-NEXT: s_sub_i32 s6, 0, s3
337+
; VI-NEXT: s_sub_i32 s7, 0, s2
338+
; VI-NEXT: s_sub_i32 s5, 0, s5
339+
; VI-NEXT: s_sub_i32 s4, 0, s4
338340
; VI-NEXT: s_ashr_i32 s8, s2, 16
339-
; VI-NEXT: s_sext_i32_i16 s7, s7
340-
; VI-NEXT: s_sub_i32 s5, 0, s2
341-
; VI-NEXT: s_sub_i32 s6, 0, s6
342-
; VI-NEXT: s_max_i32 s7, s8, s7
343-
; VI-NEXT: s_ashr_i32 s8, s3, 16
344-
; VI-NEXT: s_sext_i32_i16 s4, s4
341+
; VI-NEXT: s_ashr_i32 s9, s3, 16
342+
; VI-NEXT: s_sext_i32_i16 s2, s2
345343
; VI-NEXT: s_sext_i32_i16 s3, s3
344+
; VI-NEXT: s_sext_i32_i16 s7, s7
346345
; VI-NEXT: s_sext_i32_i16 s6, s6
346+
; VI-NEXT: s_sext_i32_i16 s4, s4
347347
; VI-NEXT: s_sext_i32_i16 s5, s5
348-
; VI-NEXT: s_sext_i32_i16 s2, s2
349-
; VI-NEXT: s_max_i32 s3, s3, s4
350-
; VI-NEXT: s_max_i32 s6, s8, s6
351-
; VI-NEXT: s_max_i32 s2, s2, s5
348+
; VI-NEXT: s_max_i32 s3, s3, s6
349+
; VI-NEXT: s_max_i32 s2, s2, s7
350+
; VI-NEXT: s_max_i32 s5, s9, s5
351+
; VI-NEXT: s_max_i32 s4, s8, s4
352+
; VI-NEXT: s_add_i32 s2, s2, 2
352353
; VI-NEXT: s_add_i32 s3, s3, 2
353-
; VI-NEXT: s_lshl_b32 s4, s6, 16
354+
; VI-NEXT: s_lshl_b32 s4, s4, 16
355+
; VI-NEXT: s_lshl_b32 s5, s5, 16
354356
; VI-NEXT: s_and_b32 s3, s3, 0xffff
355-
; VI-NEXT: s_add_i32 s2, s2, 2
356-
; VI-NEXT: s_or_b32 s3, s4, s3
357-
; VI-NEXT: s_lshl_b32 s4, s7, 16
358357
; VI-NEXT: s_and_b32 s2, s2, 0xffff
358+
; VI-NEXT: s_or_b32 s3, s5, s3
359359
; VI-NEXT: s_or_b32 s2, s4, s2
360360
; VI-NEXT: s_add_i32 s3, s3, 0x20000
361361
; VI-NEXT: s_add_i32 s2, s2, 0x20000
@@ -559,21 +559,21 @@ define amdgpu_kernel void @s_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
559559
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
560560
; VI-NEXT: s_waitcnt lgkmcnt(0)
561561
; VI-NEXT: v_mov_b32_e32 v0, s4
562-
; VI-NEXT: s_ashr_i32 s2, s1, 16
563-
; VI-NEXT: s_ashr_i32 s3, s0, 16
564-
; VI-NEXT: s_sext_i32_i16 s1, s1
562+
; VI-NEXT: s_ashr_i32 s2, s0, 16
565563
; VI-NEXT: s_sext_i32_i16 s0, s0
564+
; VI-NEXT: s_ashr_i32 s3, s1, 16
565+
; VI-NEXT: s_sext_i32_i16 s1, s1
566566
; VI-NEXT: v_mov_b32_e32 v1, s5
567-
; VI-NEXT: s_max_i32 s4, s3, s2
567+
; VI-NEXT: s_max_i32 s4, s2, s3
568568
; VI-NEXT: s_max_i32 s5, s0, s1
569569
; VI-NEXT: s_lshl_b32 s4, s4, 16
570570
; VI-NEXT: s_and_b32 s5, s5, 0xffff
571-
; VI-NEXT: s_min_i32 s2, s3, s2
571+
; VI-NEXT: s_min_i32 s2, s2, s3
572572
; VI-NEXT: s_min_i32 s0, s0, s1
573573
; VI-NEXT: s_or_b32 s4, s5, s4
574-
; VI-NEXT: s_lshl_b32 s2, s2, 16
574+
; VI-NEXT: s_lshl_b32 s1, s2, 16
575575
; VI-NEXT: s_and_b32 s0, s0, 0xffff
576-
; VI-NEXT: s_or_b32 s0, s0, s2
576+
; VI-NEXT: s_or_b32 s0, s0, s1
577577
; VI-NEXT: v_mov_b32_e32 v4, s4
578578
; VI-NEXT: v_mov_b32_e32 v2, s6
579579
; VI-NEXT: v_mov_b32_e32 v3, s7
@@ -661,12 +661,12 @@ define amdgpu_kernel void @v_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
661661
; VI-NEXT: v_mov_b32_e32 v1, s1
662662
; VI-NEXT: v_mov_b32_e32 v2, s2
663663
; VI-NEXT: v_mov_b32_e32 v3, s3
664-
; VI-NEXT: v_max_i32_sdwa v6, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
665-
; VI-NEXT: v_max_i32_sdwa v7, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
666-
; VI-NEXT: v_min_i32_sdwa v8, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
667-
; VI-NEXT: v_min_i32_sdwa v4, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
668-
; VI-NEXT: v_or_b32_sdwa v5, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
669-
; VI-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
664+
; VI-NEXT: v_max_i32_sdwa v6, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
665+
; VI-NEXT: v_max_i32_sdwa v7, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
666+
; VI-NEXT: v_min_i32_sdwa v8, sext(v4), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
667+
; VI-NEXT: v_min_i32_sdwa v4, sext(v4), sext(v5) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
668+
; VI-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
669+
; VI-NEXT: v_or_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
670670
; VI-NEXT: flat_store_dword v[0:1], v5
671671
; VI-NEXT: s_waitcnt vmcnt(0)
672672
; VI-NEXT: flat_store_dword v[2:3], v4
@@ -748,37 +748,37 @@ define amdgpu_kernel void @s_min_max_v4i16(ptr addrspace(1) %out0, ptr addrspace
748748
; VI-NEXT: s_waitcnt lgkmcnt(0)
749749
; VI-NEXT: v_mov_b32_e32 v0, s0
750750
; VI-NEXT: v_mov_b32_e32 v1, s1
751-
; VI-NEXT: v_mov_b32_e32 v3, s3
752-
; VI-NEXT: s_ashr_i32 s0, s7, 16
753-
; VI-NEXT: s_ashr_i32 s1, s5, 16
754-
; VI-NEXT: s_sext_i32_i16 s3, s7
755-
; VI-NEXT: s_sext_i32_i16 s5, s5
756751
; VI-NEXT: v_mov_b32_e32 v2, s2
757-
; VI-NEXT: s_max_i32 s2, s1, s0
758-
; VI-NEXT: s_max_i32 s7, s5, s3
759-
; VI-NEXT: s_lshl_b32 s2, s2, 16
760-
; VI-NEXT: s_and_b32 s7, s7, 0xffff
761-
; VI-NEXT: s_or_b32 s2, s7, s2
762-
; VI-NEXT: s_ashr_i32 s7, s6, 16
763-
; VI-NEXT: s_ashr_i32 s8, s4, 16
752+
; VI-NEXT: v_mov_b32_e32 v3, s3
753+
; VI-NEXT: s_ashr_i32 s0, s5, 16
754+
; VI-NEXT: s_ashr_i32 s1, s4, 16
755+
; VI-NEXT: s_sext_i32_i16 s2, s5
756+
; VI-NEXT: s_sext_i32_i16 s3, s4
757+
; VI-NEXT: s_ashr_i32 s4, s7, 16
758+
; VI-NEXT: s_ashr_i32 s5, s6, 16
759+
; VI-NEXT: s_sext_i32_i16 s7, s7
764760
; VI-NEXT: s_sext_i32_i16 s6, s6
765-
; VI-NEXT: s_sext_i32_i16 s4, s4
766-
; VI-NEXT: s_min_i32 s0, s1, s0
767-
; VI-NEXT: s_min_i32 s1, s5, s3
768-
; VI-NEXT: s_max_i32 s9, s8, s7
769-
; VI-NEXT: s_max_i32 s10, s4, s6
770-
; VI-NEXT: s_lshl_b32 s0, s0, 16
771-
; VI-NEXT: s_and_b32 s1, s1, 0xffff
761+
; VI-NEXT: s_max_i32 s8, s1, s5
762+
; VI-NEXT: s_max_i32 s9, s0, s4
763+
; VI-NEXT: s_max_i32 s10, s3, s6
764+
; VI-NEXT: s_max_i32 s11, s2, s7
765+
; VI-NEXT: s_min_i32 s0, s0, s4
766+
; VI-NEXT: s_min_i32 s2, s2, s7
772767
; VI-NEXT: s_lshl_b32 s9, s9, 16
768+
; VI-NEXT: s_and_b32 s11, s11, 0xffff
769+
; VI-NEXT: s_lshl_b32 s8, s8, 16
773770
; VI-NEXT: s_and_b32 s10, s10, 0xffff
774-
; VI-NEXT: v_mov_b32_e32 v5, s2
775-
; VI-NEXT: s_or_b32 s0, s1, s0
776-
; VI-NEXT: s_min_i32 s1, s8, s7
777-
; VI-NEXT: s_min_i32 s2, s4, s6
778-
; VI-NEXT: s_or_b32 s9, s10, s9
779-
; VI-NEXT: s_lshl_b32 s1, s1, 16
771+
; VI-NEXT: s_min_i32 s1, s1, s5
772+
; VI-NEXT: s_min_i32 s3, s3, s6
773+
; VI-NEXT: s_lshl_b32 s0, s0, 16
780774
; VI-NEXT: s_and_b32 s2, s2, 0xffff
781-
; VI-NEXT: v_mov_b32_e32 v4, s9
775+
; VI-NEXT: s_or_b32 s9, s11, s9
776+
; VI-NEXT: s_or_b32 s8, s10, s8
777+
; VI-NEXT: s_or_b32 s0, s2, s0
778+
; VI-NEXT: s_lshl_b32 s1, s1, 16
779+
; VI-NEXT: s_and_b32 s2, s3, 0xffff
780+
; VI-NEXT: v_mov_b32_e32 v4, s8
781+
; VI-NEXT: v_mov_b32_e32 v5, s9
782782
; VI-NEXT: s_or_b32 s1, s2, s1
783783
; VI-NEXT: v_mov_b32_e32 v6, s1
784784
; VI-NEXT: v_mov_b32_e32 v7, s0
@@ -861,26 +861,26 @@ define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addr
861861
; GFX9-NEXT: global_load_dword v2, v0, s[10:11] glc
862862
; GFX9-NEXT: s_waitcnt vmcnt(0)
863863
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
864-
; GFX9-NEXT: v_cmp_gt_i32_sdwa vcc, sext(v1), sext(v2) src0_sel:WORD_0 src1_sel:WORD_0
865-
; GFX9-NEXT: v_cmp_gt_i32_sdwa s[0:1], sext(v1), sext(v2) src0_sel:WORD_1 src1_sel:WORD_1
866864
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
865+
; GFX9-NEXT: v_cmp_gt_i16_e32 vcc, v1, v2
867866
; GFX9-NEXT: v_cndmask_b32_e32 v5, v2, v1, vcc
867+
; GFX9-NEXT: v_cmp_gt_i16_e64 s[0:1], v3, v4
868868
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
869-
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
870869
; GFX9-NEXT: v_cndmask_b32_e64 v6, v4, v3, s[0:1]
870+
; GFX9-NEXT: v_and_b32_e32 v5, 0xffff, v5
871871
; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
872-
; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v5
873-
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
874872
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
875-
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
876-
; GFX9-NEXT: v_lshl_or_b32 v4, v6, 16, v4
873+
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
874+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
875+
; GFX9-NEXT: v_lshl_or_b32 v5, v6, 16, v5
877876
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
878-
; GFX9-NEXT: v_or_b32_e32 v2, v5, v2
879-
; GFX9-NEXT: global_store_dword v0, v4, s[4:5]
877+
; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v4
878+
; GFX9-NEXT: global_store_dword v0, v5, s[4:5]
880879
; GFX9-NEXT: s_waitcnt vmcnt(0)
881880
; GFX9-NEXT: global_store_dword v0, v1, s[6:7]
882881
; GFX9-NEXT: s_waitcnt vmcnt(0)
883-
; GFX9-NEXT: v_and_b32_e32 v0, 3, v2
882+
; GFX9-NEXT: v_or_b32_e32 v0, v2, v3
883+
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
884884
; GFX9-NEXT: global_store_byte v[0:1], v0, off
885885
; GFX9-NEXT: s_waitcnt vmcnt(0)
886886
; GFX9-NEXT: s_endpgm
@@ -899,34 +899,42 @@ define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addr
899899
; VI-NEXT: s_waitcnt vmcnt(0)
900900
; VI-NEXT: v_mov_b32_e32 v0, s0
901901
; VI-NEXT: v_mov_b32_e32 v1, s1
902-
; VI-NEXT: v_mov_b32_e32 v2, s2
903902
; VI-NEXT: v_mov_b32_e32 v3, s3
904-
; VI-NEXT: v_ashrrev_i32_e32 v10, 16, v4
905-
; VI-NEXT: v_ashrrev_i32_e32 v11, 16, v5
906-
; VI-NEXT: v_bfe_i32 v6, v4, 0, 16
907-
; VI-NEXT: v_bfe_i32 v7, v5, 0, 16
908-
; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v4
909-
; VI-NEXT: v_lshrrev_b32_e32 v9, 16, v5
910-
; VI-NEXT: v_cmp_gt_i32_e32 vcc, v10, v11
911-
; VI-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
912-
; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], v6, v7
913-
; VI-NEXT: v_cndmask_b32_e64 v6, v5, v4, s[0:1]
914-
; VI-NEXT: v_cndmask_b32_e32 v7, v8, v9, vcc
915-
; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v10
916-
; VI-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1]
917-
; VI-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
918-
; VI-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1]
919-
; VI-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
920-
; VI-NEXT: v_lshlrev_b32_e32 v5, 1, v5
921-
; VI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
922-
; VI-NEXT: flat_store_dword v[0:1], v6
903+
; VI-NEXT: v_mov_b32_e32 v2, s2
904+
; VI-NEXT: v_readfirstlane_b32 s0, v4
905+
; VI-NEXT: v_readfirstlane_b32 s1, v5
906+
; VI-NEXT: s_ashr_i32 s3, s0, 16
907+
; VI-NEXT: s_ashr_i32 s5, s1, 16
908+
; VI-NEXT: s_cmp_gt_i32 s3, s5
909+
; VI-NEXT: s_sext_i32_i16 s2, s0
910+
; VI-NEXT: s_sext_i32_i16 s4, s1
911+
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
912+
; VI-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
913+
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
914+
; VI-NEXT: s_cselect_b32 s0, s3, s5
915+
; VI-NEXT: s_cselect_b32 s3, s5, s3
916+
; VI-NEXT: s_lshl_b32 s5, s0, 16
917+
; VI-NEXT: s_cmp_gt_i32 s2, s4
918+
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
919+
; VI-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1]
920+
; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec
921+
; VI-NEXT: s_cselect_b32 s0, s2, s4
922+
; VI-NEXT: s_cselect_b32 s1, s4, s2
923+
; VI-NEXT: s_and_b32 s0, s0, 0xffff
924+
; VI-NEXT: v_lshlrev_b16_e32 v4, 1, v4
925+
; VI-NEXT: s_lshl_b32 s2, s3, 16
926+
; VI-NEXT: s_and_b32 s1, s1, 0xffff
927+
; VI-NEXT: s_or_b32 s0, s0, s5
928+
; VI-NEXT: v_or_b32_e32 v4, v5, v4
929+
; VI-NEXT: s_or_b32 s1, s1, s2
930+
; VI-NEXT: v_mov_b32_e32 v5, s0
931+
; VI-NEXT: v_and_b32_e32 v4, 3, v4
932+
; VI-NEXT: v_mov_b32_e32 v6, s1
933+
; VI-NEXT: flat_store_dword v[0:1], v5
923934
; VI-NEXT: s_waitcnt vmcnt(0)
924-
; VI-NEXT: v_or_b32_e32 v0, v9, v5
925-
; VI-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
926-
; VI-NEXT: v_and_b32_e32 v0, 3, v0
927-
; VI-NEXT: flat_store_dword v[2:3], v4
935+
; VI-NEXT: flat_store_dword v[2:3], v6
928936
; VI-NEXT: s_waitcnt vmcnt(0)
929-
; VI-NEXT: flat_store_byte v[0:1], v0
937+
; VI-NEXT: flat_store_byte v[0:1], v4
930938
; VI-NEXT: s_waitcnt vmcnt(0)
931939
; VI-NEXT: s_endpgm
932940
;
@@ -1013,19 +1021,19 @@ define amdgpu_kernel void @u_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
10131021
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
10141022
; VI-NEXT: s_waitcnt lgkmcnt(0)
10151023
; VI-NEXT: v_mov_b32_e32 v1, s5
1016-
; VI-NEXT: s_and_b32 s2, s1, 0xffff
1017-
; VI-NEXT: s_and_b32 s3, s0, 0xffff
1018-
; VI-NEXT: s_lshr_b32 s1, s1, 16
1019-
; VI-NEXT: s_lshr_b32 s0, s0, 16
1020-
; VI-NEXT: s_max_u32 s5, s0, s1
1024+
; VI-NEXT: s_lshr_b32 s2, s0, 16
1025+
; VI-NEXT: s_lshr_b32 s3, s1, 16
1026+
; VI-NEXT: s_and_b32 s0, s0, 0xffff
1027+
; VI-NEXT: s_and_b32 s1, s1, 0xffff
1028+
; VI-NEXT: s_max_u32 s5, s2, s3
10211029
; VI-NEXT: v_mov_b32_e32 v0, s4
1022-
; VI-NEXT: s_max_u32 s4, s3, s2
1030+
; VI-NEXT: s_max_u32 s4, s0, s1
10231031
; VI-NEXT: s_lshl_b32 s5, s5, 16
10241032
; VI-NEXT: s_min_u32 s0, s0, s1
1033+
; VI-NEXT: s_min_u32 s1, s2, s3
10251034
; VI-NEXT: s_or_b32 s4, s4, s5
1026-
; VI-NEXT: s_min_u32 s2, s3, s2
1027-
; VI-NEXT: s_lshl_b32 s0, s0, 16
1028-
; VI-NEXT: s_or_b32 s0, s2, s0
1035+
; VI-NEXT: s_lshl_b32 s1, s1, 16
1036+
; VI-NEXT: s_or_b32 s0, s0, s1
10291037
; VI-NEXT: v_mov_b32_e32 v4, s4
10301038
; VI-NEXT: v_mov_b32_e32 v2, s6
10311039
; VI-NEXT: v_mov_b32_e32 v3, s7

0 commit comments

Comments
 (0)