@@ -17,8 +17,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
17
17
; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
18
18
; SI-NEXT: s_waitcnt vmcnt(0)
19
19
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
20
- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
21
- ; SI-NEXT: v_bfe_u32 v2, v2, 0, v3
20
+ ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
21
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
22
+ ; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2
22
23
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
23
24
; SI-NEXT: s_endpgm
24
25
;
@@ -37,8 +38,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
37
38
; VI-NEXT: v_mov_b32_e32 v1, s1
38
39
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
39
40
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
40
- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
41
- ; VI-NEXT: v_bfe_u32 v2, v3, 0, v2
41
+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
42
+ ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
43
+ ; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
42
44
; VI-NEXT: flat_store_dword v[0:1], v2
43
45
; VI-NEXT: s_endpgm
44
46
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -47,8 +49,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
47
49
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
48
50
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
49
51
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
50
- %width5 = and i32 %width , 31
51
- %sub = sub i32 32 , %width5
52
+ %sub = sub i32 32 , %width
52
53
%shl = shl i32 %src , %sub
53
54
%bfe = lshr i32 %shl , %sub
54
55
store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -71,7 +72,6 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
71
72
; SI-NEXT: s_waitcnt vmcnt(0)
72
73
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
73
74
; SI-NEXT: s_mov_b32 s6, -1
74
- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
75
75
; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
76
76
; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
77
77
; SI-NEXT: v_lshrrev_b32_e32 v3, v3, v2
@@ -95,8 +95,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
95
95
; VI-NEXT: v_mov_b32_e32 v1, s1
96
96
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
97
97
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98
- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
99
- ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v2
98
+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
100
99
; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
101
100
; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
102
101
; VI-NEXT: flat_store_dword v[0:1], v2
@@ -109,8 +108,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
109
108
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
110
109
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
111
110
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
112
- %width5 = and i32 %width , 31
113
- %sub = sub i32 32 , %width5
111
+ %sub = sub i32 32 , %width
114
112
%shl = shl i32 %src , %sub
115
113
%bfe = lshr i32 %shl , %sub
116
114
store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -221,8 +219,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
221
219
; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
222
220
; SI-NEXT: s_waitcnt vmcnt(0)
223
221
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
224
- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
225
- ; SI-NEXT: v_bfe_i32 v2, v2, 0, v3
222
+ ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
223
+ ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
224
+ ; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2
226
225
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
227
226
; SI-NEXT: s_endpgm
228
227
;
@@ -241,8 +240,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
241
240
; VI-NEXT: v_mov_b32_e32 v1, s1
242
241
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
243
242
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
244
- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
245
- ; VI-NEXT: v_bfe_i32 v2, v3, 0, v2
243
+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
244
+ ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
245
+ ; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3
246
246
; VI-NEXT: flat_store_dword v[0:1], v2
247
247
; VI-NEXT: s_endpgm
248
248
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -251,8 +251,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
251
251
%out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
252
252
%src = load volatile i32 , ptr addrspace (1 ) %in0.gep
253
253
%width = load volatile i32 , ptr addrspace (1 ) %in0.gep
254
- %width5 = and i32 %width , 31
255
- %sub = sub i32 32 , %width5
254
+ %sub = sub i32 32 , %width
256
255
%shl = shl i32 %src , %sub
257
256
%bfe = ashr i32 %shl , %sub
258
257
store i32 %bfe , ptr addrspace (1 ) %out.gep
0 commit comments