@@ -17,9 +17,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
17
17
; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
18
18
; SI-NEXT: s_waitcnt vmcnt(0)
19
19
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
20
- ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
21
- ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
22
- ; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2
20
+ ; SI-NEXT: v_and_b32_e32 v3, 31, v3
21
+ ; SI-NEXT: v_bfe_u32 v2, v2, 0, v3
23
22
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
24
23
; SI-NEXT: s_endpgm
25
24
;
@@ -38,9 +37,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
38
37
; VI-NEXT: v_mov_b32_e32 v1, s1
39
38
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
40
39
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
41
- ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
42
- ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
43
- ; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
40
+ ; VI-NEXT: v_and_b32_e32 v2, 31, v4
41
+ ; VI-NEXT: v_bfe_u32 v2, v3, 0, v2
44
42
; VI-NEXT: flat_store_dword v[0:1], v2
45
43
; VI-NEXT: s_endpgm
46
44
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -49,7 +47,8 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
49
47
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
50
48
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
51
49
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
52
- %sub = sub i32 32 , %width
50
+ %width5 = and i32 %width , 31
51
+ %sub = sub i32 32 , %width5
53
52
%shl = shl i32 %src , %sub
54
53
%bfe = lshr i32 %shl , %sub
55
54
store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -72,6 +71,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
72
71
; SI-NEXT: s_waitcnt vmcnt(0)
73
72
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
74
73
; SI-NEXT: s_mov_b32 s6, -1
74
+ ; SI-NEXT: v_and_b32_e32 v3, 31, v3
75
75
; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
76
76
; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
77
77
; SI-NEXT: v_lshrrev_b32_e32 v3, v3, v2
@@ -95,7 +95,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
95
95
; VI-NEXT: v_mov_b32_e32 v1, s1
96
96
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
97
97
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98
- ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
98
+ ; VI-NEXT: v_and_b32_e32 v2, 31, v4
99
+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v2
99
100
; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
100
101
; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
101
102
; VI-NEXT: flat_store_dword v[0:1], v2
@@ -108,7 +109,8 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
108
109
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
109
110
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
110
111
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
111
- %sub = sub i32 32 , %width
112
+ %width5 = and i32 %width , 31
113
+ %sub = sub i32 32 , %width5
112
114
%shl = shl i32 %src , %sub
113
115
%bfe = lshr i32 %shl , %sub
114
116
store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -219,9 +221,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
219
221
; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
220
222
; SI-NEXT: s_waitcnt vmcnt(0)
221
223
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
222
- ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
223
- ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
224
- ; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2
224
+ ; SI-NEXT: v_and_b32_e32 v3, 31, v3
225
+ ; SI-NEXT: v_bfe_i32 v2, v2, 0, v3
225
226
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
226
227
; SI-NEXT: s_endpgm
227
228
;
@@ -240,9 +241,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
240
241
; VI-NEXT: v_mov_b32_e32 v1, s1
241
242
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
242
243
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
243
- ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
244
- ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
245
- ; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3
244
+ ; VI-NEXT: v_and_b32_e32 v2, 31, v4
245
+ ; VI-NEXT: v_bfe_i32 v2, v3, 0, v2
246
246
; VI-NEXT: flat_store_dword v[0:1], v2
247
247
; VI-NEXT: s_endpgm
248
248
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -251,7 +251,8 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
251
251
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
252
252
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
253
253
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
254
- %sub = sub i32 32 , %width
254
+ %width5 = and i32 %width , 31
255
+ %sub = sub i32 32 , %width5
255
256
%shl = shl i32 %src , %sub
256
257
%bfe = ashr i32 %shl , %sub
257
258
store i32 %bfe , ptr addrspace (1 ) %out.gep
0 commit comments