@@ -713,6 +713,10 @@ def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
713
713
BuildableType<"::mlir::VectorType::get("
714
714
"{2},$_builder.getI16Type())">;
715
715
716
+ def ROCDL_V2F32Type : FixedVectorOfLengthAndType<[2], [F32]>,
717
+ BuildableType<"::mlir::VectorType::get("
718
+ "{2},$_builder.getF32Type())">;
719
+
716
720
def ROCDL_V2F16Type : FixedVectorOfLengthAndType<[2], [F16]>,
717
721
BuildableType<"::mlir::VectorType::get("
718
722
"{2},$_builder.getF16Type())">;
@@ -1005,6 +1009,120 @@ def ROCDL_CvtScaleF32SrBf8F32Op :
1005
1009
}];
1006
1010
}
1007
1011
1012
+ //===---------------------------------------------------------------------===//
1013
+ // 4-bit float scale intrinsics
1014
+ //===---------------------------------------------------------------------===//
1015
+ def ROCDL_CvtScaleF32PkFp4F32Op :
1016
+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.f32", [], [], [Pure], 1>,
1017
+ Arguments<(ins I32:$old, F32:$src0, F32:$src1, F32: $scale, I32:$byteSel)> {
1018
+ let summary = "Convert f32 to packed fp4 and scale";
1019
+ let description = [{ Convert `src` based on $byteSe to packed fp4, then scale
1020
+ the packed values by the exponent in `scale`.
1021
+ }];
1022
+ let assemblyFormat = [{
1023
+ attr-dict $src0 `,` $src1 `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1024
+ }];
1025
+ }
1026
+
1027
+ def ROCDL_CvtScaleF32PkFp4F16Op :
1028
+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.f16", [], [], [Pure], 1>,
1029
+ Arguments<(ins I32:$old, ROCDL_V2F16Type:$src, F32: $scale, I32:$byteSel)> {
1030
+ let summary = "Convert f16 to packed fp4 and scale";
1031
+ let description = [{ Convert `src` based on $byteSel to packed fp4, then scale
1032
+ the packed values by the exponent in `scale`.
1033
+ }];
1034
+ let assemblyFormat = [{
1035
+ attr-dict $src `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1036
+ }];
1037
+ }
1038
+
1039
+ def ROCDL_CvtScaleF32PkFp4Bf16Op :
1040
+ ROCDL_IntrOp<"cvt.scalef32.pk.fp4.bf16", [], [], [Pure], 1>,
1041
+ Arguments<(ins I32:$old, ROCDL_V2BF16Type:$src, F32: $scale, I32:$byteSel)> {
1042
+ let summary = "Convert bf16 to packed fp4 and scale";
1043
+ let description = [{ Convert `src` based on $byteSel to packed fp4, then scale
1044
+ the packed values by the exponent in `scale`.
1045
+ }];
1046
+ let assemblyFormat = [{
1047
+ attr-dict $src `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1048
+ }];
1049
+ }
1050
+
1051
+ def ROCDL_CvtScaleF32SrPkFp4F32Op :
1052
+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.f32", [], [], [Pure], 1>,
1053
+ Arguments<(ins I32:$old, ROCDL_V2F32Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1054
+ let summary = "Scale and convert f32 to packed fp4 using stochastic rounding";
1055
+ let description = [{
1056
+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1057
+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1058
+ }];
1059
+ let assemblyFormat = [{
1060
+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1061
+ }];
1062
+ }
1063
+
1064
+ def ROCDL_CvtScaleF32SrPkFp4F16Op :
1065
+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.f16", [], [], [Pure], 1>,
1066
+ Arguments<(ins I32:$old, ROCDL_V2F16Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1067
+ let summary = "Scale and convert f16 to packed fp4 using stochastic rounding";
1068
+ let description = [{
1069
+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1070
+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1071
+ }];
1072
+ let assemblyFormat = [{
1073
+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1074
+ }];
1075
+ }
1076
+
1077
+ def ROCDL_CvtScaleF32SrPkFp4Bf16Op :
1078
+ ROCDL_IntrOp<"cvt.scalef32.sr.pk.fp4.bf16", [], [], [Pure], 1>,
1079
+ Arguments<(ins I32:$old, ROCDL_V2BF16Type:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
1080
+ let summary = "Scale and convert bf16 to packed fp4 using stochastic rounding";
1081
+ let description = [{
1082
+ Scale `src` by the exponent in `scale` then convert to packed fp4 with stochastic rounding
1083
+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
1084
+ }];
1085
+ let assemblyFormat = [{
1086
+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
1087
+ }];
1088
+ }
1089
+
1090
+ def ROCDL_CvtScaleF32PkF32Fp4Op :
1091
+ ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp4", [], [], [Pure], 1>,
1092
+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1093
+ let summary = "Convert fp4 to packed f32 and scale";
1094
+ let description = [{ Convert `src` based on $byteSel to packed f32, then scale
1095
+ the packed values by the exponent in `scale`.
1096
+ }];
1097
+ let assemblyFormat = [{
1098
+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1099
+ }];
1100
+ }
1101
+
1102
+
1103
+ def ROCDL_CvtScaleF32PkF16Fp4Op :
1104
+ ROCDL_IntrOp<"cvt.scalef32.pk.f16.fp4", [], [], [Pure], 1>,
1105
+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1106
+ let summary = "Convert fp4 to packed f16 and scale";
1107
+ let description = [{ Convert `src` based on $byteSel to packed f16, then scale
1108
+ the packed values by the exponent in `scale`.
1109
+ }];
1110
+ let assemblyFormat = [{
1111
+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1112
+ }];
1113
+ }
1114
+
1115
+ def ROCDL_CvtScaleF32PkBf16Fp4Op :
1116
+ ROCDL_IntrOp<"cvt.scalef32.pk.bf16.fp4", [], [], [Pure], 1>,
1117
+ Arguments<(ins I32:$src, F32:$scale, I32:$byteSel)> {
1118
+ let summary = "Convert fp4 to packed bf16 and scale";
1119
+ let description = [{ Convert `src` based on $byteSel to packed bf16, then scale
1120
+ the packed values by the exponent in `scale`.
1121
+ }];
1122
+ let assemblyFormat = [{
1123
+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
1124
+ }];
1125
+ }
1008
1126
//===---------------------------------------------------------------------===//
1009
1127
// 8-bit float intrinsics
1010
1128
//===---------------------------------------------------------------------===//
0 commit comments