@@ -1153,6 +1153,9 @@ llvm.func @rocdl_4bit_packed_floats(%old: i32, %source0: f32, %source1: f32, %so
1153
1153
// CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f32(i32 %10, <2 x float> %3, i32 %6, float 1.000000e+00, i32 0)
1154
1154
// CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %11, <2 x half> %4, i32 %6, float 1.000000e+00, i32 0)
1155
1155
// CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.bf16(i32 %12, <2 x bfloat> %5, i32 %6, float 1.000000e+00, i32 0)
1156
+ // CHECK: call <2 x float> @llvm.amdgcn.cvt.scalef32.pk.f32.fp4(i32 %0, float 1.000000e+00, i32 0)
1157
+ // CHECK: call <2 x half> @llvm.amdgcn.cvt.scalef32.pk.f16.fp4(i32 %0, float 1.000000e+00, i32 0)
1158
+ // CHECK: call <2 x bfloat> @llvm.amdgcn.cvt.scalef32.pk.bf16.fp4(i32 %0, float 1.000000e+00, i32 0)
1156
1159
%c0 = llvm.mlir.constant (0 : i32 ) : i32
1157
1160
%scale = llvm.mlir.constant (1.0 : f32 ) : f32
1158
1161
%pk1 = rocdl.cvt.scalef32.pk.fp4.f32 %source0 , %source1 , %scale -> %old [%c0 ] : i32
@@ -1161,6 +1164,9 @@ llvm.func @rocdl_4bit_packed_floats(%old: i32, %source0: f32, %source1: f32, %so
1161
1164
%sr1 = rocdl.cvt.scalef32.sr.pk.fp4.f32 %source , %stoch , %scale -> %pk3 [%c0 ] : i32
1162
1165
%sr2 = rocdl.cvt.scalef32.sr.pk.fp4.f16 %source_half , %stoch , %scale -> %sr1 [%c0 ] : i32
1163
1166
%sr3 = rocdl.cvt.scalef32.sr.pk.fp4.bf16 %source_bfloat , %stoch , %scale -> %sr2 [%c0 ] : i32
1167
+ %pk4 = rocdl.cvt.scalef32.pk.f32.fp4 %old [%c0 ], %scale : vector <2 xf32 >
1168
+ %pk5 = rocdl.cvt.scalef32.pk.f16.fp4 %old [%c0 ], %scale : vector <2 xf16 >
1169
+ %pk6 = rocdl.cvt.scalef32.pk.bf16.fp4 %old [%c0 ], %scale : vector <2 xbf16 >
1164
1170
llvm.return %sr3 : i32
1165
1171
}
1166
1172
0 commit comments