@@ -5,16 +5,12 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
5
5
; GFX950: ; %bb.0:
6
6
; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
7
7
; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
8
- ; GFX950-NEXT: v_mov_b32_e32 v1, 0xffffff80
9
- ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f
10
8
; GFX950-NEXT: v_mov_b32_e32 v0, 0
11
9
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
12
- ; GFX950-NEXT: s_ashr_i32 s1, s1, s2
13
- ; GFX950-NEXT: s_ashr_i32 s0, s0, s2
14
- ; GFX950-NEXT: v_med3_i32 v3, s0, v1, v2
15
- ; GFX950-NEXT: v_med3_i32 v1, s1, v1, v2
16
- ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1
17
- ; GFX950-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
10
+ ; GFX950-NEXT: s_and_b32 s2, s2, 31
11
+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
12
+ ; GFX950-NEXT: v_mov_b32_e32 v2, s2
13
+ ; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
18
14
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
19
15
; GFX950-NEXT: s_endpgm
20
16
%insert.0 = insertelement <2 x i32 > poison, i32 %src0 , i64 0
@@ -36,15 +32,12 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
36
32
; GFX950: ; %bb.0:
37
33
; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
38
34
; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
39
- ; GFX950-NEXT: v_mov_b32_e32 v1, 0xff
40
35
; GFX950-NEXT: v_mov_b32_e32 v0, 0
41
36
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
42
- ; GFX950-NEXT: s_ashr_i32 s1, s1, s2
43
- ; GFX950-NEXT: s_ashr_i32 s0, s0, s2
44
- ; GFX950-NEXT: v_med3_i32 v2, s0, 0, v1
45
- ; GFX950-NEXT: v_med3_i32 v1, s1, 0, v1
46
- ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1
47
- ; GFX950-NEXT: v_or_b32_e32 v1, v2, v1
37
+ ; GFX950-NEXT: s_and_b32 s2, s2, 31
38
+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
39
+ ; GFX950-NEXT: v_mov_b32_e32 v2, s2
40
+ ; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
48
41
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
49
42
; GFX950-NEXT: s_endpgm
50
43
%insert.0 = insertelement <2 x i32 > poison, i32 %src0 , i64 0
0 commit comments