@@ -263,7 +263,7 @@ TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940
263
263
TARGET_BUILTIN(__builtin_amdgcn_fdot2, " fV2hV2hfIb" , " nc" , " dot10-insts" )
264
264
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f16_f16, " hV2hV2hh" , " nc" , " dot9-insts" )
265
265
TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, " sV2sV2ss" , " nc" , " dot9-insts" )
266
- TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, " fV2sV2sfIb" , " nc" , " dot9 -insts" )
266
+ TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, " fV2sV2sfIb" , " nc" , " dot12 -insts" )
267
267
TARGET_BUILTIN(__builtin_amdgcn_sdot2, " SiV2SsV2SsSiIb" , " nc" , " dot2-insts" )
268
268
TARGET_BUILTIN(__builtin_amdgcn_udot2, " UiV2UsV2UsUiIb" , " nc" , " dot2-insts" )
269
269
TARGET_BUILTIN(__builtin_amdgcn_sdot4, " SiSiSiSiIb" , " nc" , " dot1-insts" )
@@ -276,6 +276,7 @@ TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "dot11-insts")
276
276
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, " fUiUif" , " nc" , " dot11-insts" )
277
277
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, " fUiUif" , " nc" , " dot11-insts" )
278
278
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, " fUiUif" , " nc" , " dot11-insts" )
279
+ TARGET_BUILTIN(__builtin_amdgcn_fdot2c_f32_bf16, " fV2yV2yfIb" , " nc" , " dot13-insts" )
279
280
280
281
// ===----------------------------------------------------------------------===//
281
282
// GFX10+ only builtins.
@@ -431,6 +432,50 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-conversion-
431
432
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, " ifiiIi" , " nc" , " fp8-conversion-insts" )
432
433
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, " ifiiIi" , " nc" , " fp8-conversion-insts" )
433
434
435
+ // ===----------------------------------------------------------------------===//
436
+ // GFX950 only builtins.
437
+ // ===----------------------------------------------------------------------===//
438
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4, " V4fV8ZiV8ZiV4fIiIiIiiIii" , " nc" , " gfx950-insts" )
439
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4, " V16fV8ZiV8ZiV16fIiIiIiiIii" , " nc" , " gfx950-insts" )
440
+
441
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, " V4fV8hV8hV4fIiIiIi" , " nc" , " gfx950-insts" )
442
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf16, " V4fV8yV8yV4fIiIiIi" , " nc" , " gfx950-insts" )
443
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, " V16fV8hV8hV16fIiIiIi" , " nc" , " gfx950-insts" )
444
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, " V16fV8yV8yV16fIiIiIi" , " nc" , " gfx950-insts" )
445
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x64_i8, " V4iV4iV4iV4iIiIiIi" , " nc" , " gfx950-insts" )
446
+ TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x32_i8, " V16iV4iV4iV16iIiIiIi" , " nc" , " gfx950-insts" )
447
+
448
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, " V4fV8hV16hV4fiIiIi" , " nc" , " gfx950-insts" )
449
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, " V16fV8hV16hV16fiIiIi" , " nc" , " gfx950-insts" )
450
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, " V4fV8yV16yV4fiIiIi" , " nc" , " gfx950-insts" )
451
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, " V16fV8yV16yV16fiIiIi" , " nc" , " gfx950-insts" )
452
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, " V4iV4iV8iV4iiIiIi" , " nc" , " gfx950-insts" )
453
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x64_i8, " V16iV4iV8iV16iiIiIi" , " nc" , " gfx950-insts" )
454
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_bf8, " V4fV4iV8iV4fiIiIi" , " nc" , " gfx950-insts" )
455
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8, " V4fV4iV8iV4fiIiIi" , " nc" , " gfx950-insts" )
456
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8, " V4fV4iV8iV4fiIiIi" , " nc" , " gfx950-insts" )
457
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, " V4fV4iV8iV4fiIiIi" , " nc" , " gfx950-insts" )
458
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, " V16fV4iV8iV16fiIiIi" , " nc" , " gfx950-insts" )
459
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, " V16fV4iV8iV16fiIiIi" , " nc" , " gfx950-insts" )
460
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, " V16fV4iV8iV16fiIiIi" , " nc" , " gfx950-insts" )
461
+ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, " V16fV4iV8iV16fiIiIi" , " nc" , " gfx950-insts" )
462
+
463
+ TARGET_BUILTIN(__builtin_amdgcn_permlane16_swap, " V2UiUiUiIbIb" , " nc" , " permlane16-swap" )
464
+ TARGET_BUILTIN(__builtin_amdgcn_permlane32_swap, " V2UiUiUiIbIb" , " nc" , " permlane32-swap" )
465
+
466
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr4_b64_v2i32, " V2iV2i*3" , " nc" , " gfx950-insts" )
467
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr6_b96_v3i32, " V3iV3i*3" , " nc" , " gfx950-insts" )
468
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr8_b64_v2i32, " V2iV2i*3" , " nc" , " gfx950-insts" )
469
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4i16, " V4sV4s*3" , " nc" , " gfx950-insts" )
470
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4f16, " V4hV4h*3" , " nc" , " gfx950-insts" )
471
+ TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4bf16, " V4yV4y*3" , " nc" , " gfx950-insts" )
472
+
473
+ TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_i8_i32, " UsUiUiUi" , " nc" , " ashr-pk-insts" )
474
+ TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_u8_i32, " UsUiUiUi" , " nc" , " ashr-pk-insts" )
475
+
476
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32, " V6UiV16fV16ff" , " nc" , " gfx950-insts" )
477
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32, " V6UiV16fV16ff" , " nc" , " gfx950-insts" )
478
+
434
479
// ===----------------------------------------------------------------------===//
435
480
// GFX12+ only builtins.
436
481
// ===----------------------------------------------------------------------===//
@@ -520,5 +565,60 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
520
565
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
521
566
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
522
567
568
+ TARGET_BUILTIN(__builtin_amdgcn_prng_b32, " UiUi" , " nc" , " prng-inst" )
569
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16, " V6UiV32hf" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
570
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_f16, " V6UiV32hf" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
571
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16, " V6UiV32yf" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
572
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16, " V6UiV32yf" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
573
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f16_fp8, " V2hV2hifIiIb" , " nc" , " fp8-cvt-scale-insts" )
574
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f16_bf8, " V2hV2hifIiIb" , " nc" , " bf8-cvt-scale-insts" )
575
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f32_fp8, " fifIi" , " nc" , " fp8-cvt-scale-insts" )
576
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f32_bf8, " fifIi" , " nc" , " bf8-cvt-scale-insts" )
577
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_f32, " V2sV2sfffIb" , " nc" , " fp8-cvt-scale-insts" )
578
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_f32, " V2sV2sfffIb" , " nc" , " bf8-cvt-scale-insts" )
579
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_fp8, " V2fUifIb" , " nc" , " fp8-cvt-scale-insts" )
580
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_bf8, " V2fUifIb" , " nc" , " bf8-cvt-scale-insts" )
581
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_f16, " V2sV2sV2hfIb" , " nc" , " fp8-cvt-scale-insts" )
582
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_bf16, " V2sV2sV2yfIb" , " nc" , " fp8-cvt-scale-insts" )
583
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_f16, " V2sV2sV2hfIb" , " nc" , " bf8-cvt-scale-insts" )
584
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_bf16, " V2sV2sV2yfIb" , " nc" , " bf8-cvt-scale-insts" )
585
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_fp4, " V2fUifIi" , " nc" , " fp4-cvt-scale-insts" )
586
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_f32, " UiUifffIi" , " nc" , " fp4-cvt-scale-insts" )
587
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_fp4, " V2hUifIi" , " nc" , " fp4-cvt-scale-insts" )
588
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_fp4, " V2yUifIi" , " nc" , " fp4-cvt-scale-insts" )
589
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f32_fp6, " V32fV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
590
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f32_bf6, " V32fV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
591
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f16_fp6, " V32hV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
592
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf16_fp6, " V32yV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
593
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f16_bf6, " V32hV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
594
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf16_bf6, " V32yV6Uif" , " nc" , " fp6bf6-cvt-scale-insts" )
595
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_fp8, " V2hUifIb" , " nc" , " fp8-cvt-scale-insts" )
596
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_fp8, " V2yUifIb" , " nc" , " fp8-cvt-scale-insts" )
597
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_bf8, " V2hUifIb" , " nc" , " bf8-cvt-scale-insts" )
598
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_bf8, " V2yUifIb" , " nc" , " bf8-cvt-scale-insts" )
599
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_f16, " UiUiV2hfIi" , " nc" , " fp4-cvt-scale-insts" )
600
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_bf16, " UiUiV2yfIi" , " nc" , " fp4-cvt-scale-insts" )
601
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16, " UiUiV2hUifIi" , " nc" , " fp4-cvt-scale-insts" )
602
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16, " UiUiV2yUifIi" , " nc" , " fp4-cvt-scale-insts" )
603
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32, " UiUiV2fUifIi" , " nc" , " fp4-cvt-scale-insts" )
604
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_bf16, " iiyUifIi" , " nc" , " bf8-cvt-scale-insts" )
605
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_f16, " iihUifIi" , " nc" , " bf8-cvt-scale-insts" )
606
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_f32, " iifUifIi" , " nc" , " bf8-cvt-scale-insts" )
607
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_bf16, " iiyUifIi" , " nc" , " fp8-cvt-scale-insts" )
608
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_f16, " iihUifIi" , " nc" , " fp8-cvt-scale-insts" )
609
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_f32, " iifUifIi" , " nc" , " fp8-cvt-scale-insts" )
610
+
611
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16, " V6UiV32yUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
612
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16, " V6UiV32hUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
613
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, " V6UiV32fUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
614
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, " V6UiV32yUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
615
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, " V6UiV32hUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
616
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, " V6UiV32fUif" , " nc" , " f16bf16-to-fp6bf6-cvt-scale-insts" )
617
+ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, " iiiiIUc" , " nc" , " bitop3-insts" )
618
+ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, " ssssIUc" , " nc" , " bitop3-insts" )
619
+
620
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, " V2yV2yfUiIb" , " nc" , " f32-to-f16bf16-cvt-sr-insts" )
621
+ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, " V2hV2hfUiIb" , " nc" , " f32-to-f16bf16-cvt-sr-insts" )
622
+
523
623
#undef BUILTIN
524
624
#undef TARGET_BUILTIN
0 commit comments