Skip to content

Commit 53bc645

Browse files
committed
rebase fixes
1 parent cebd356 commit 53bc645

File tree

3 files changed

+96
-209
lines changed

3 files changed

+96
-209
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll

Lines changed: 16 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -197,31 +197,13 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
197197
}
198198

199199
define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
200-
; GFX6-LABEL: abs_sgpr_v2i8:
201-
; GFX6: ; %bb.0:
202-
; GFX6-NEXT: s_sext_i32_i8 s0, s0
203-
; GFX6-NEXT: s_sext_i32_i8 s1, s1
204-
; GFX6-NEXT: s_abs_i32 s0, s0
205-
; GFX6-NEXT: s_abs_i32 s1, s1
206-
; GFX6-NEXT: ; return to shader part epilog
207-
;
208-
; GFX8-LABEL: abs_sgpr_v2i8:
209-
; GFX8: ; %bb.0:
210-
; GFX8-NEXT: s_sext_i32_i8 s0, s0
211-
; GFX8-NEXT: s_sext_i32_i8 s1, s1
212-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
213-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
214-
; GFX8-NEXT: s_abs_i32 s0, s0
215-
; GFX8-NEXT: s_abs_i32 s1, s1
216-
; GFX8-NEXT: ; return to shader part epilog
217-
;
218-
; GFX10-LABEL: abs_sgpr_v2i8:
219-
; GFX10: ; %bb.0:
220-
; GFX10-NEXT: s_sext_i32_i8 s0, s0
221-
; GFX10-NEXT: s_sext_i32_i8 s1, s1
222-
; GFX10-NEXT: s_abs_i32 s0, s0
223-
; GFX10-NEXT: s_abs_i32 s1, s1
224-
; GFX10-NEXT: ; return to shader part epilog
200+
; GFX-LABEL: abs_sgpr_v2i8:
201+
; GFX: ; %bb.0:
202+
; GFX-NEXT: s_sext_i32_i8 s0, s0
203+
; GFX-NEXT: s_sext_i32_i8 s1, s1
204+
; GFX-NEXT: s_abs_i32 s0, s0
205+
; GFX-NEXT: s_abs_i32 s1, s1
206+
; GFX-NEXT: ; return to shader part epilog
225207
%res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
226208
ret <2 x i8> %res
227209
}
@@ -266,38 +248,15 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
266248
}
267249

268250
define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
269-
; GFX6-LABEL: abs_sgpr_v3i8:
270-
; GFX6: ; %bb.0:
271-
; GFX6-NEXT: s_sext_i32_i8 s0, s0
272-
; GFX6-NEXT: s_sext_i32_i8 s1, s1
273-
; GFX6-NEXT: s_sext_i32_i8 s2, s2
274-
; GFX6-NEXT: s_abs_i32 s0, s0
275-
; GFX6-NEXT: s_abs_i32 s1, s1
276-
; GFX6-NEXT: s_abs_i32 s2, s2
277-
; GFX6-NEXT: ; return to shader part epilog
278-
;
279-
; GFX8-LABEL: abs_sgpr_v3i8:
280-
; GFX8: ; %bb.0:
281-
; GFX8-NEXT: s_sext_i32_i8 s0, s0
282-
; GFX8-NEXT: s_sext_i32_i8 s1, s1
283-
; GFX8-NEXT: s_sext_i32_i8 s2, s2
284-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
285-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
286-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
287-
; GFX8-NEXT: s_abs_i32 s0, s0
288-
; GFX8-NEXT: s_abs_i32 s1, s1
289-
; GFX8-NEXT: s_abs_i32 s2, s2
290-
; GFX8-NEXT: ; return to shader part epilog
291-
;
292-
; GFX10-LABEL: abs_sgpr_v3i8:
293-
; GFX10: ; %bb.0:
294-
; GFX10-NEXT: s_sext_i32_i8 s0, s0
295-
; GFX10-NEXT: s_sext_i32_i8 s1, s1
296-
; GFX10-NEXT: s_sext_i32_i8 s2, s2
297-
; GFX10-NEXT: s_abs_i32 s0, s0
298-
; GFX10-NEXT: s_abs_i32 s1, s1
299-
; GFX10-NEXT: s_abs_i32 s2, s2
300-
; GFX10-NEXT: ; return to shader part epilog
251+
; GFX-LABEL: abs_sgpr_v3i8:
252+
; GFX: ; %bb.0:
253+
; GFX-NEXT: s_sext_i32_i8 s0, s0
254+
; GFX-NEXT: s_sext_i32_i8 s1, s1
255+
; GFX-NEXT: s_sext_i32_i8 s2, s2
256+
; GFX-NEXT: s_abs_i32 s0, s0
257+
; GFX-NEXT: s_abs_i32 s1, s1
258+
; GFX-NEXT: s_abs_i32 s2, s2
259+
; GFX-NEXT: ; return to shader part epilog
301260
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
302261
ret <3 x i8> %res
303262
}

llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll

Lines changed: 40 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,11 @@ define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
3434
; GFX7-GISEL-NEXT: v_max_i32_e32 v0, v0, v1
3535
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
3636
;
37-
; GFX8-SDAG-LABEL: test_vector_reduce_smax_v2i8:
38-
; GFX8-SDAG: ; %bb.0: ; %entry
39-
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40-
; GFX8-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41-
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
42-
;
43-
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v2i8:
44-
; GFX8-GISEL: ; %bb.0: ; %entry
45-
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
47-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
48-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
49-
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
37+
; GFX8-LABEL: test_vector_reduce_smax_v2i8:
38+
; GFX8: ; %bb.0: ; %entry
39+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40+
; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41+
; GFX8-NEXT: s_setpc_b64 s[30:31]
5042
;
5143
; GFX9-LABEL: test_vector_reduce_smax_v2i8:
5244
; GFX9: ; %bb.0: ; %entry
@@ -173,11 +165,8 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) {
173165
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v3i8:
174166
; GFX8-GISEL: ; %bb.0: ; %entry
175167
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
177-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
178-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
179-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v2
180-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
168+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
169+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
181170
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
182171
;
183172
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v3i8:
@@ -350,23 +339,20 @@ define i8 @test_vector_reduce_smax_v4i8(<4 x i8> %v) {
350339
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v4i8:
351340
; GFX8-GISEL: ; %bb.0: ; %entry
352341
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
354-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
355-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
356-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
357-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
358-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
359-
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
360-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
361-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
342+
; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
343+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
344+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
345+
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
346+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
347+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
362348
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
363-
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0, v1
349+
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4, v1
364350
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
365-
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0, v2
351+
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4, v2
366352
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
367353
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
368354
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
369-
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0, v3
355+
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4, v3
370356
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
371357
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
372358
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -675,30 +661,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
675661
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v8i8:
676662
; GFX8-GISEL: ; %bb.0: ; %entry
677663
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
679-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
680-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
681-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
682-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v5
683-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
684-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
685-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v6
686-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
687-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
688-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v7
689-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
664+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
665+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
666+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
667+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
668+
; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
690669
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
691670
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
692-
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0, v2
671+
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4, v2
693672
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
694-
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0, v1
673+
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4, v1
695674
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
696-
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0, v2
675+
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4, v2
697676
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
698-
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0, v3
677+
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4, v3
699678
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
700679
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
701-
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0, v3
680+
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4, v3
702681
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
703682
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
704683
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -1135,46 +1114,31 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
11351114
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v16i8:
11361115
; GFX8-GISEL: ; %bb.0: ; %entry
11371116
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1139-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v8
1140-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1141-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1142-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v9
1143-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1144-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1145-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v10
1146-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1147-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1148-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v11
1149-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1150-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
1151-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v12
1152-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1153-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v5, 8, v5
1154-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v13
1155-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1156-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6
1157-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v14
1158-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1159-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v7, 8, v7
1160-
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v15
1161-
; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1117+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1118+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1119+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1120+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1121+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1122+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1123+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1124+
; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
11621125
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v4
11631126
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v5
11641127
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, v2, v6
11651128
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, v3, v7
1129+
; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
11661130
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
11671131
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
1168-
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0, v2
1132+
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4, v2
11691133
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
1170-
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0, v1
1134+
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4, v1
11711135
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
1172-
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0, v2
1136+
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4, v2
11731137
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1174-
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0, v3
1138+
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4, v3
11751139
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
11761140
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
1177-
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0, v3
1141+
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4, v3
11781142
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
11791143
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
11801144
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3

0 commit comments

Comments
 (0)