@@ -34,19 +34,11 @@ define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
34
34
; GFX7-GISEL-NEXT: v_max_i32_e32 v0, v0, v1
35
35
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
36
36
;
37
- ; GFX8-SDAG-LABEL: test_vector_reduce_smax_v2i8:
38
- ; GFX8-SDAG: ; %bb.0: ; %entry
39
- ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40
- ; GFX8-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41
- ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
42
- ;
43
- ; GFX8-GISEL-LABEL: test_vector_reduce_smax_v2i8:
44
- ; GFX8-GISEL: ; %bb.0: ; %entry
45
- ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
47
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
48
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
49
- ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
37
+ ; GFX8-LABEL: test_vector_reduce_smax_v2i8:
38
+ ; GFX8: ; %bb.0: ; %entry
39
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40
+ ; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
41
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
50
42
;
51
43
; GFX9-LABEL: test_vector_reduce_smax_v2i8:
52
44
; GFX9: ; %bb.0: ; %entry
@@ -173,11 +165,8 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) {
173
165
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v3i8:
174
166
; GFX8-GISEL: ; %bb.0: ; %entry
175
167
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
177
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
178
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
179
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v2
180
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
168
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
169
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, v0, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
181
170
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
182
171
;
183
172
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v3i8:
@@ -350,23 +339,20 @@ define i8 @test_vector_reduce_smax_v4i8(<4 x i8> %v) {
350
339
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v4i8:
351
340
; GFX8-GISEL: ; %bb.0: ; %entry
352
341
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
354
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
355
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
356
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
357
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
358
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
359
- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
360
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
361
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
342
+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
343
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
344
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
345
+ ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
346
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
347
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
362
348
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
363
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
349
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
364
350
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
365
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
351
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
366
352
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
367
353
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
368
354
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
369
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
355
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
370
356
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
371
357
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
372
358
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -675,30 +661,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
675
661
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v8i8:
676
662
; GFX8-GISEL: ; %bb.0: ; %entry
677
663
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
679
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
680
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
681
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
682
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v5
683
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
684
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
685
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v6
686
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
687
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
688
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v7
689
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
664
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
665
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
666
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
667
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
668
+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
690
669
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
691
670
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
692
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
671
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
693
672
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
694
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
673
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
695
674
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
696
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
675
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
697
676
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
698
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
677
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
699
678
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
700
679
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
701
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
680
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
702
681
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
703
682
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
704
683
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
@@ -1135,46 +1114,31 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
1135
1114
; GFX8-GISEL-LABEL: test_vector_reduce_smax_v16i8:
1136
1115
; GFX8-GISEL: ; %bb.0: ; %entry
1137
1116
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1139
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v8
1140
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1141
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1142
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v9
1143
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1144
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1145
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v10
1146
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1147
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1148
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v11
1149
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1150
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
1151
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v12
1152
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1153
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v5, 8, v5
1154
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v13
1155
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1156
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6
1157
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v14
1158
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1159
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v7, 8, v7
1160
- ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v8, 8, v15
1161
- ; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
1117
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1118
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1119
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1120
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1121
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1122
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1123
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1124
+ ; GFX8-GISEL-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
1162
1125
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v4
1163
1126
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v5
1164
1127
; GFX8-GISEL-NEXT: v_max_i16_e32 v2, v2, v6
1165
1128
; GFX8-GISEL-NEXT: v_max_i16_e32 v3, v3, v7
1129
+ ; GFX8-GISEL-NEXT: s_sext_i32_i8 s4, s4
1166
1130
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v2
1167
1131
; GFX8-GISEL-NEXT: v_max_i16_e32 v1, v1, v3
1168
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
1132
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
1169
1133
; GFX8-GISEL-NEXT: v_max_i16_e32 v0, v0, v1
1170
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, 0 , v1
1134
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v1, s4 , v1
1171
1135
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
1172
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, 0 , v2
1136
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v2, s4 , v2
1173
1137
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1174
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
1138
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
1175
1139
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1176
1140
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2
1177
- ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, 0 , v3
1141
+ ; GFX8-GISEL-NEXT: v_max_i16_e32 v3, s4 , v3
1178
1142
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1179
1143
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
1180
1144
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3
0 commit comments