@@ -6880,21 +6880,36 @@ Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
6880
6880
}
6881
6881
6882
6882
llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
6883
- unsigned IID, bool ExtendLane , llvm::Type *RetTy,
6883
+ unsigned IID, bool ExtendLaneArg , llvm::Type *RetTy,
6884
6884
SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
6885
6885
6886
6886
const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
6887
6887
RetTy->getPrimitiveSizeInBits();
6888
6888
llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
6889
6889
Ops[1]->getType()};
6890
- if (ExtendLane ) {
6890
+ if (ExtendLaneArg ) {
6891
6891
auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
6892
6892
Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
6893
6893
Builder.getInt64(0));
6894
6894
}
6895
6895
return EmitFP8NeonCall(IID, Tys, Ops, E, name);
6896
6896
}
6897
6897
6898
+ llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
6899
+ unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
6900
+ SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
6901
+
6902
+ if (ExtendLaneArg) {
6903
+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
6904
+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
6905
+ Builder.getInt64(0));
6906
+ }
6907
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
6908
+ RetTy->getPrimitiveSizeInBits();
6909
+ return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
6910
+ Ops, E, name);
6911
+ }
6912
+
6898
6913
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6899
6914
bool neg) {
6900
6915
int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -12840,7 +12855,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
12840
12855
12841
12856
unsigned Int;
12842
12857
bool ExtractLow = false;
12843
- bool ExtendLane = false;
12858
+ bool ExtendLaneArg = false;
12844
12859
switch (BuiltinID) {
12845
12860
default: return nullptr;
12846
12861
case NEON::BI__builtin_neon_vbsl_v:
@@ -14115,24 +14130,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
14115
14130
Ops, E, "fdot2");
14116
14131
case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
14117
14132
case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
14118
- ExtendLane = true;
14133
+ ExtendLaneArg = true;
14119
14134
LLVM_FALLTHROUGH;
14120
14135
case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
14121
14136
case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
14122
14137
return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
14123
- ExtendLane , HalfTy, Ops, E, "fdot2_lane");
14138
+ ExtendLaneArg , HalfTy, Ops, E, "fdot2_lane");
14124
14139
case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
14125
14140
case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
14126
14141
return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
14127
14142
FloatTy, Ops, E, "fdot4");
14128
14143
case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
14129
14144
case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
14130
- ExtendLane = true;
14145
+ ExtendLaneArg = true;
14131
14146
LLVM_FALLTHROUGH;
14132
14147
case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
14133
14148
case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
14134
14149
return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
14135
- ExtendLane , FloatTy, Ops, E, "fdot4_lane");
14150
+ ExtendLaneArg , FloatTy, Ops, E, "fdot4_lane");
14136
14151
14137
14152
case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
14138
14153
return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
@@ -14158,7 +14173,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
14158
14173
return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
14159
14174
{llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
14160
14175
"vmlall");
14161
-
14176
+ case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
14177
+ ExtendLaneArg = true;
14178
+ LLVM_FALLTHROUGH;
14179
+ case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
14180
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
14181
+ ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
14182
+ case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
14183
+ ExtendLaneArg = true;
14184
+ LLVM_FALLTHROUGH;
14185
+ case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
14186
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
14187
+ ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
14188
+ case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
14189
+ ExtendLaneArg = true;
14190
+ LLVM_FALLTHROUGH;
14191
+ case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
14192
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
14193
+ ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14194
+ case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
14195
+ ExtendLaneArg = true;
14196
+ LLVM_FALLTHROUGH;
14197
+ case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
14198
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
14199
+ ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14200
+ case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
14201
+ ExtendLaneArg = true;
14202
+ LLVM_FALLTHROUGH;
14203
+ case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
14204
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
14205
+ ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14206
+ case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
14207
+ ExtendLaneArg = true;
14208
+ LLVM_FALLTHROUGH;
14209
+ case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
14210
+ return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
14211
+ ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14162
14212
case NEON::BI__builtin_neon_vamin_f16:
14163
14213
case NEON::BI__builtin_neon_vaminq_f16:
14164
14214
case NEON::BI__builtin_neon_vamin_f32:
0 commit comments