@@ -1287,55 +1287,55 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
1287
1287
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1288
1288
; SDAG-GFX1100-TRUE16: ; %bb.0:
1289
1289
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1291
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1292
- ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1293
- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
1294
- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l
1295
1290
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
1296
1291
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
1292
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1293
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
1294
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.l
1295
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l
1297
1296
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1298
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1]
1299
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v6, v7, v4 op_sel_hi:[1,1,1]
1297
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v6, v7, v8 op_sel_hi:[1,1,1] clamp
1298
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v9, v10, v11 op_sel_hi:[1,1,1] clamp
1300
1299
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1301
- ; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v2, v2 clamp
1302
- ; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp
1300
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1301
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1302
+ ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1303
+ ; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1303
1304
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
1304
1305
;
1305
1306
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1306
1307
; SDAG-GFX1100-FAKE16: ; %bb.0:
1307
1308
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1309
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1309
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1310
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1310
1311
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1311
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1312
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1313
- ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1314
- ; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1315
- ; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1312
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1313
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1314
+ ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1315
+ ; SDAG-GFX1100-FAKE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1316
1316
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
1317
1317
;
1318
- ; SDAG- GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1319
- ; SDAG- GFX900: ; %bb.0:
1320
- ; SDAG- GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321
- ; SDAG- GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1322
- ; SDAG- GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1323
- ; SDAG- GFX900-NEXT: v_mad_mixhi_f16 v7 , v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1324
- ; SDAG- GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1325
- ; SDAG- GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1326
- ; SDAG- GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1327
- ; SDAG- GFX900-NEXT: s_setpc_b64 s[30:31]
1318
+ ; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1319
+ ; GFX900: ; %bb.0:
1320
+ ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321
+ ; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1322
+ ; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1323
+ ; GFX900-NEXT: v_mad_mixlo_f16 v2 , v1, v3, v5 op_sel_hi:[1,1,1] clamp
1324
+ ; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1325
+ ; GFX900-NEXT: v_mov_b32_e32 v0, v6
1326
+ ; GFX900-NEXT: v_mov_b32_e32 v1, v2
1327
+ ; GFX900-NEXT: s_setpc_b64 s[30:31]
1328
1328
;
1329
- ; SDAG- GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1330
- ; SDAG- GFX906: ; %bb.0:
1331
- ; SDAG- GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332
- ; SDAG- GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1333
- ; SDAG- GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1334
- ; SDAG- GFX906-NEXT: v_fma_mixhi_f16 v7 , v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1335
- ; SDAG- GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1336
- ; SDAG- GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1337
- ; SDAG- GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1338
- ; SDAG- GFX906-NEXT: s_setpc_b64 s[30:31]
1329
+ ; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1330
+ ; GFX906: ; %bb.0:
1331
+ ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332
+ ; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1333
+ ; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1334
+ ; GFX906-NEXT: v_fma_mixlo_f16 v2 , v1, v3, v5 op_sel_hi:[1,1,1] clamp
1335
+ ; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1336
+ ; GFX906-NEXT: v_mov_b32_e32 v0, v6
1337
+ ; GFX906-NEXT: v_mov_b32_e32 v1, v2
1338
+ ; GFX906-NEXT: s_setpc_b64 s[30:31]
1339
1339
;
1340
1340
; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1341
1341
; SDAG-VI: ; %bb.0:
@@ -1426,28 +1426,6 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
1426
1426
; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
1427
1427
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1428
1428
;
1429
- ; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1430
- ; GISEL-GFX900: ; %bb.0:
1431
- ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432
- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1433
- ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1434
- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1435
- ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1436
- ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
1437
- ; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2
1438
- ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1439
- ;
1440
- ; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1441
- ; GISEL-GFX906: ; %bb.0:
1442
- ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443
- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1444
- ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1445
- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1446
- ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1447
- ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
1448
- ; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2
1449
- ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1450
- ;
1451
1429
; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1452
1430
; GISEL-VI: ; %bb.0:
1453
1431
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments