Skip to content

Commit c34efe3

Browse files
[AArch64][Clang] Refactor code to emit SVE & SME builtins (#70662)
This patch removes duplicated code in EmitAArch64SVEBuiltinExpr and EmitAArch64SMEBuiltinExpr by creating a new function called GetAArch64SVEProcessedOperands which handles splitting up multi-vector arguments using vector extracts. These changes are non-functional.
1 parent 930bc6c commit c34efe3

File tree

8 files changed

+803
-792
lines changed

8 files changed

+803
-792
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 78 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -9617,22 +9617,17 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
96179617
Value *BasePtr = Ops[1];
96189618

96199619
// Does the store have an offset?
9620-
if (Ops.size() > 3)
9620+
if (Ops.size() > (2 + N))
96219621
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
96229622

9623-
Value *Val = Ops.back();
9624-
96259623
// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
96269624
// need to break up the tuple vector.
96279625
SmallVector<llvm::Value*, 5> Operands;
9628-
unsigned MinElts = VTy->getElementCount().getKnownMinValue();
9629-
for (unsigned I = 0; I < N; ++I) {
9630-
Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9631-
Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
9632-
}
9626+
for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
9627+
Operands.push_back(Ops[I]);
96339628
Operands.append({Predicate, BasePtr});
9634-
96359629
Function *F = CGM.getIntrinsic(IntID, { VTy });
9630+
96369631
return Builder.CreateCall(F, Operands);
96379632
}
96389633

@@ -9939,26 +9934,24 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
99399934
return Call;
99409935
}
99419936

9942-
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
9943-
const CallExpr *E) {
9937+
void CodeGenFunction::GetAArch64SVEProcessedOperands(
9938+
unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
9939+
SVETypeFlags TypeFlags) {
99449940
// Find out if any arguments are required to be integer constant expressions.
99459941
unsigned ICEArguments = 0;
99469942
ASTContext::GetBuiltinTypeError Error;
99479943
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
99489944
assert(Error == ASTContext::GE_None && "Should not codegen an error");
99499945

9950-
llvm::Type *Ty = ConvertType(E->getType());
9951-
if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9952-
BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9953-
Value *Val = EmitScalarExpr(E->getArg(0));
9954-
return EmitSVEReinterpret(Val, Ty);
9955-
}
9946+
// Tuple set/get only requires one insert/extract vector, which is
9947+
// created by EmitSVETupleSetOrGet.
9948+
bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
99569949

9957-
llvm::SmallVector<Value *, 4> Ops;
99589950
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9959-
if ((ICEArguments & (1 << i)) == 0)
9960-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
9961-
else {
9951+
bool IsICE = ICEArguments & (1 << i);
9952+
Value *Arg = EmitScalarExpr(E->getArg(i));
9953+
9954+
if (IsICE) {
99629955
// If this is required to be a constant, constant fold it so that we know
99639956
// that the generated intrinsic gets a ConstantInt.
99649957
std::optional<llvm::APSInt> Result =
@@ -9970,12 +9963,49 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
99709963
// immediate requires more than a handful of bits.
99719964
*Result = Result->extOrTrunc(32);
99729965
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
9966+
continue;
9967+
}
9968+
9969+
if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
9970+
Ops.push_back(Arg);
9971+
continue;
9972+
}
9973+
9974+
auto *VTy = cast<ScalableVectorType>(Arg->getType());
9975+
unsigned MinElts = VTy->getMinNumElements();
9976+
bool IsPred = VTy->getElementType()->isIntegerTy(1);
9977+
unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
9978+
9979+
if (N == 1) {
9980+
Ops.push_back(Arg);
9981+
continue;
9982+
}
9983+
9984+
for (unsigned I = 0; I < N; ++I) {
9985+
Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
9986+
auto *NewVTy =
9987+
ScalableVectorType::get(VTy->getElementType(), MinElts / N);
9988+
Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
99739989
}
99749990
}
9991+
}
9992+
9993+
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
9994+
const CallExpr *E) {
9995+
llvm::Type *Ty = ConvertType(E->getType());
9996+
if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9997+
BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9998+
Value *Val = EmitScalarExpr(E->getArg(0));
9999+
return EmitSVEReinterpret(Val, Ty);
10000+
}
997510001

997610002
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
997710003
AArch64SVEIntrinsicsProvenSorted);
10004+
10005+
llvm::SmallVector<Value *, 4> Ops;
997810006
SVETypeFlags TypeFlags(Builtin->TypeModifier);
10007+
GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10008+
997910009
if (TypeFlags.isLoad())
998010010
return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
998110011
TypeFlags.isZExtReturn());
@@ -9989,14 +10019,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
998910019
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
999010020
else if (TypeFlags.isGatherPrefetch())
999110021
return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9992-
else if (TypeFlags.isStructLoad())
9993-
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9994-
else if (TypeFlags.isStructStore())
9995-
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10022+
else if (TypeFlags.isStructLoad())
10023+
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10024+
else if (TypeFlags.isStructStore())
10025+
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
999610026
else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
9997-
return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10027+
return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
999810028
else if (TypeFlags.isTupleCreate())
9999-
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10029+
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
1000010030
else if (TypeFlags.isUndef())
1000110031
return UndefValue::get(Ty);
1000210032
else if (Builtin->LLVMIntrinsic != 0) {
@@ -10248,13 +10278,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1024810278
case SVE::BI__builtin_sve_svtbl2_f64: {
1024910279
SVETypeFlags TF(Builtin->TypeModifier);
1025010280
auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10251-
Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
10252-
ConstantInt::get(CGM.Int64Ty, 0));
10253-
unsigned MinElts = VTy->getMinNumElements();
10254-
Value *V1 = Builder.CreateExtractVector(
10255-
VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
1025610281
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10257-
return Builder.CreateCall(F, {V0, V1, Ops[1]});
10282+
return Builder.CreateCall(F, Ops);
1025810283
}
1025910284

1026010285
case SVE::BI__builtin_sve_svset_neonq_s8:
@@ -10312,35 +10337,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1031210337

1031310338
Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
1031410339
const CallExpr *E) {
10315-
// Find out if any arguments are required to be integer constant expressions.
10316-
unsigned ICEArguments = 0;
10317-
ASTContext::GetBuiltinTypeError Error;
10318-
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10319-
assert(Error == ASTContext::GE_None && "Should not codegen an error");
10320-
10321-
llvm::Type *Ty = ConvertType(E->getType());
10322-
llvm::SmallVector<Value *, 4> Ops;
10323-
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10324-
if ((ICEArguments & (1 << i)) == 0)
10325-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
10326-
else {
10327-
// If this is required to be a constant, constant fold it so that we know
10328-
// that the generated intrinsic gets a ConstantInt.
10329-
std::optional<llvm::APSInt> Result =
10330-
E->getArg(i)->getIntegerConstantExpr(getContext());
10331-
assert(Result && "Expected argument to be a constant");
10332-
10333-
// Immediates for SVE llvm intrinsics are always 32bit. We can safely
10334-
// truncate because the immediate has been range checked and no valid
10335-
// immediate requires more than a handful of bits.
10336-
*Result = Result->extOrTrunc(32);
10337-
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10338-
}
10339-
}
10340-
1034110340
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
1034210341
AArch64SMEIntrinsicsProvenSorted);
10342+
10343+
llvm::SmallVector<Value *, 4> Ops;
1034310344
SVETypeFlags TypeFlags(Builtin->TypeModifier);
10345+
GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10346+
1034410347
if (TypeFlags.isLoad() || TypeFlags.isStore())
1034510348
return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
1034610349
else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
@@ -10353,21 +10356,24 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
1035310356
BuiltinID == SME::BI__builtin_sme_svldr_za ||
1035410357
BuiltinID == SME::BI__builtin_sme_svstr_za)
1035510358
return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10356-
else if (Builtin->LLVMIntrinsic != 0) {
10357-
// Predicates must match the main datatype.
10358-
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10359-
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10360-
if (PredTy->getElementType()->isIntegerTy(1))
10361-
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
1036210359

10363-
Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10364-
getSVEOverloadTypes(TypeFlags, Ty, Ops));
10365-
Value *Call = Builder.CreateCall(F, Ops);
10366-
return Call;
10367-
}
10360+
// Should not happen!
10361+
if (Builtin->LLVMIntrinsic == 0)
10362+
return nullptr;
1036810363

10369-
/// Should not happen
10370-
return nullptr;
10364+
// Predicates must match the main datatype.
10365+
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10366+
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10367+
if (PredTy->getElementType()->isIntegerTy(1))
10368+
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10369+
10370+
Function *F =
10371+
TypeFlags.isOverloadNone()
10372+
? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10373+
: CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10374+
Value *Call = Builder.CreateCall(F, Ops);
10375+
10376+
return FormSVEBuiltinResult(Call);
1037110377
}
1037210378

1037310379
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4311,6 +4311,11 @@ class CodeGenFunction : public CodeGenTypeCache {
43114311
llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags,
43124312
llvm::SmallVectorImpl<llvm::Value *> &Ops,
43134313
unsigned IntID);
4314+
4315+
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E,
4316+
SmallVectorImpl<llvm::Value *> &Ops,
4317+
SVETypeFlags TypeFlags);
4318+
43144319
llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43154320

43164321
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,18 @@
1616
#endif
1717
// CHECK-LABEL: @test_svst2_bf16(
1818
// CHECK-NEXT: entry:
19-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
20-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
21-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
22-
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
20+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
21+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
2323
// CHECK-NEXT: ret void
2424
//
2525
// CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t(
2626
// CPP-CHECK-NEXT: entry:
27-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
29-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
30-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
27+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
28+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
29+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
30+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
3131
// CPP-CHECK-NEXT: ret void
3232
//
3333
void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
@@ -37,20 +37,20 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
3737

3838
// CHECK-LABEL: @test_svst2_vnum_bf16(
3939
// CHECK-NEXT: entry:
40-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
41-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
42-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
43-
// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
44-
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
40+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
41+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
42+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
43+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
44+
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
4545
// CHECK-NEXT: ret void
4646
//
4747
// CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
51-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
52-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
53-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
51+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
52+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
53+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
5454
// CPP-CHECK-NEXT: ret void
5555
//
5656
void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data)

0 commit comments

Comments
 (0)