@@ -601,6 +601,34 @@ define <2 x i16> @test_cvt_scalef32_pk_fp8_f16_word1(<2 x i16> %old, <2 x half>
601
601
ret <2 x i16 > %ret
602
602
}
603
603
604
+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_f16_imm1 (<2 x i16 > %old , float %scale ) {
605
+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_f16_imm1:
606
+ ; GCN: ; %bb.0:
607
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, 4.0, v1
609
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
610
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i1 false )
611
+ ret <2 x i16 > %ret
612
+ }
613
+
614
+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_f16_imm2 (<2 x i16 > %old , float %scale ) {
615
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk_fp8_f16_imm2:
616
+ ; GFX950-SDAG: ; %bb.0:
617
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618
+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
619
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, s0, v1
620
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
621
+ ;
622
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk_fp8_f16_imm2:
623
+ ; GFX950-GISEL: ; %bb.0:
624
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
626
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, v2, v1
627
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
628
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i1 false )
629
+ ret <2 x i16 > %ret
630
+ }
631
+
604
632
define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_word0 (<2 x i16 > %old , <2 x bfloat> %src , float %scale ) {
605
633
; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_word0:
606
634
; GCN: ; %bb.0:
@@ -621,6 +649,27 @@ define <2 x i16> @test_cvt_scalef32_pk_fp8_bf16_word1(<2 x i16> %old, <2 x bfloa
621
649
ret <2 x i16 > %ret
622
650
}
623
651
652
+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_imm1 (<2 x i16 > %old , float %scale ) {
653
+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_imm1:
654
+ ; GCN: ; %bb.0:
655
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_bf16 v0, 4.0, v1
657
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
658
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i1 false )
659
+ ret <2 x i16 > %ret
660
+ }
661
+
662
+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_imm2 (<2 x i16 > %old , float %scale ) {
663
+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_imm2:
664
+ ; GCN: ; %bb.0:
665
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666
+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
667
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_bf16 v0, s0, v1
668
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
669
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i1 false )
670
+ ret <2 x i16 > %ret
671
+ }
672
+
624
673
define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_word0 (<2 x i16 > %old , <2 x half > %src , float %scale ) {
625
674
; GCN-LABEL: test_cvt_scalef32_pk_bf8_f16_word0:
626
675
; GCN: ; %bb.0:
@@ -641,6 +690,34 @@ define <2 x i16> @test_cvt_scalef32_pk_bf8_f16_word1(<2 x i16> %old, <2 x half>
641
690
ret <2 x i16 > %ret
642
691
}
643
692
693
+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_imm1 (<2 x i16 > %old , float %scale ) {
694
+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_f16_imm1:
695
+ ; GCN: ; %bb.0:
696
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697
+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, 4.0, v1
698
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
699
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i1 false )
700
+ ret <2 x i16 > %ret
701
+ }
702
+
703
+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_imm2 (<2 x i16 > %old , float %scale ) {
704
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk_bf8_f16_imm2:
705
+ ; GFX950-SDAG: ; %bb.0:
706
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707
+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
708
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, s0, v1
709
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
710
+ ;
711
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk_bf8_f16_imm2:
712
+ ; GFX950-GISEL: ; %bb.0:
713
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
715
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, v2, v1
716
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
717
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i1 false )
718
+ ret <2 x i16 > %ret
719
+ }
720
+
644
721
define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_word0 (<2 x i16 > %old , <2 x bfloat> %src , float %scale ) {
645
722
; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_word0:
646
723
; GCN: ; %bb.0:
@@ -661,6 +738,27 @@ define <2 x i16> @test_cvt_scalef32_pk_bf8_bf16_word1(<2 x i16> %old, <2 x bfloa
661
738
ret <2 x i16 > %ret
662
739
}
663
740
741
+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_imm1 (<2 x i16 > %old , float %scale ) {
742
+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_imm1:
743
+ ; GCN: ; %bb.0:
744
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745
+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_bf16 v0, 4.0, v1
746
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
747
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i1 false )
748
+ ret <2 x i16 > %ret
749
+ }
750
+
751
+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_imm2 (<2 x i16 > %old , float %scale ) {
752
+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_imm2:
753
+ ; GCN: ; %bb.0:
754
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755
+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
756
+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_bf16 v0, s0, v1
757
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
758
+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i1 false )
759
+ ret <2 x i16 > %ret
760
+ }
761
+
664
762
define <2 x float > @test_cvt_scale_f32_fp4_byte0 (i32 %src , float %scale ) {
665
763
; GCN-LABEL: test_cvt_scale_f32_fp4_byte0:
666
764
; GCN: ; %bb.0:
@@ -1236,6 +1334,37 @@ define i32 @test_cvt_scalef32_fp4_f16_byte3(<2 x half> %src0, float %scale, i32
1236
1334
ret i32 %ret
1237
1335
}
1238
1336
1337
+ define i32 @test_cvt_scalef32_fp4_f16_imm1 (float %scale , i32 %old ) {
1338
+ ; GCN-LABEL: test_cvt_scalef32_fp4_f16_imm1:
1339
+ ; GCN: ; %bb.0:
1340
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, 4.0, v0
1342
+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1343
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1344
+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16 (i32 %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i32 0 )
1345
+ ret i32 %ret
1346
+ }
1347
+
1348
+ define i32 @test_cvt_scalef32_fp4_f16_imm2 (float %scale , i32 %old ) {
1349
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_fp4_f16_imm2:
1350
+ ; GFX950-SDAG: ; %bb.0:
1351
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352
+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
1353
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0
1354
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, v1
1355
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1356
+ ;
1357
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_fp4_f16_imm2:
1358
+ ; GFX950-GISEL: ; %bb.0:
1359
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
1361
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, v2, v0
1362
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, v1
1363
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1364
+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16 (i32 %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i32 0 )
1365
+ ret i32 %ret
1366
+ }
1367
+
1239
1368
define i32 @test_cvt_scalef32_fp4_bf16_byte0 (<2 x bfloat> %src0 , float %scale , i32 %old ) {
1240
1369
; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte0:
1241
1370
; GCN: ; %bb.0:
@@ -1283,6 +1412,29 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte3(<2 x bfloat> %src0, float %scale, i
1283
1412
ret i32 %ret
1284
1413
}
1285
1414
1415
+ define i32 @test_cvt_scalef32_fp4_bf16_imm1 (float %scale , i32 %old ) {
1416
+ ; GCN-LABEL: test_cvt_scalef32_fp4_bf16_imm1:
1417
+ ; GCN: ; %bb.0:
1418
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, 4.0, v0
1420
+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1421
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1422
+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.bf16 (i32 %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i32 0 )
1423
+ ret i32 %ret
1424
+ }
1425
+
1426
+ define i32 @test_cvt_scalef32_fp4_bf16_imm2 (float %scale , i32 %old ) {
1427
+ ; GCN-LABEL: test_cvt_scalef32_fp4_bf16_imm2:
1428
+ ; GCN: ; %bb.0:
1429
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430
+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
1431
+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0
1432
+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1433
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1434
+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.bf16 (i32 %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i32 0 )
1435
+ ret i32 %ret
1436
+ }
1437
+
1286
1438
define amdgpu_ps void @test_scalef32_pk32_fp6_f32_vv_inreg_src (<16 x float > inreg %src , float %scale , ptr addrspace (1 ) %out ) {
1287
1439
; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f32_vv_inreg_src:
1288
1440
; GFX950-SDAG: ; %bb.0:
0 commit comments