@@ -534,3 +534,99 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
534
534
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32 (float %x , i32 %r , i32 %old , i32 3 )
535
535
ret i32 %ret
536
536
}
537
+
538
+ define float @test_sext_cvt_f32_fp8 (i16 %a ) {
539
+ ; GFX940-LABEL: test_sext_cvt_f32_fp8:
540
+ ; GFX940: ; %bb.0:
541
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
543
+ ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
544
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
545
+ ;
546
+ ; GFX12-LABEL: test_sext_cvt_f32_fp8:
547
+ ; GFX12: ; %bb.0:
548
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
549
+ ; GFX12-NEXT: s_wait_expcnt 0x0
550
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
551
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
552
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
553
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
554
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
555
+ ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
556
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
557
+ %a.sext = sext i16 %a to i32
558
+ %ret = tail call float @llvm.amdgcn.cvt.f32.fp8 (i32 %a.sext , i32 1 )
559
+ ret float %ret
560
+ }
561
+
562
+ define float @test_sext_cvt_f32_bf8 (i16 %a ) {
563
+ ; GFX940-LABEL: test_sext_cvt_f32_bf8:
564
+ ; GFX940: ; %bb.0:
565
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
567
+ ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
568
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
569
+ ;
570
+ ; GFX12-LABEL: test_sext_cvt_f32_bf8:
571
+ ; GFX12: ; %bb.0:
572
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
573
+ ; GFX12-NEXT: s_wait_expcnt 0x0
574
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
575
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
576
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
577
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
578
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
579
+ ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
580
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
581
+ %a.sext = sext i16 %a to i32
582
+ %ret = tail call float @llvm.amdgcn.cvt.f32.bf8 (i32 %a.sext , i32 1 )
583
+ ret float %ret
584
+ }
585
+
586
+ define <2 x float > @test_sext_cvt_pk_f32_bf8_word1 (i16 %a ) {
587
+ ; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
588
+ ; GFX940: ; %bb.0:
589
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
591
+ ; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
592
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
593
+ ;
594
+ ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
595
+ ; GFX12: ; %bb.0:
596
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
597
+ ; GFX12-NEXT: s_wait_expcnt 0x0
598
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
599
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
600
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
601
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
602
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
603
+ ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
604
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
605
+ %a.sext = sext i16 %a to i32
606
+ %ret = tail call <2 x float > @llvm.amdgcn.cvt.pk.f32.bf8 (i32 %a.sext , i1 true )
607
+ ret <2 x float > %ret
608
+ }
609
+
610
+ define <2 x float > @test_sext_cvt_pk_f32_fp8_word0 (i16 %a ) {
611
+ ; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
612
+ ; GFX940: ; %bb.0:
613
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614
+ ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
615
+ ; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
616
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
617
+ ;
618
+ ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
619
+ ; GFX12: ; %bb.0:
620
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
621
+ ; GFX12-NEXT: s_wait_expcnt 0x0
622
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
623
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
624
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
625
+ ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
626
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
627
+ ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
628
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
629
+ %a.sext = sext i16 %a to i32
630
+ %ret = tail call <2 x float > @llvm.amdgcn.cvt.pk.f32.fp8 (i32 %a.sext , i1 false )
631
+ ret <2 x float > %ret
632
+ }
0 commit comments