Skip to content

Commit e2550b7

Browse files
Revert "[AArch64][Clang] Refactor code to emit SVE & SME builtins (#70662)"
This reverts commit c34efe3.
1 parent 6e8d957 commit e2550b7

File tree

8 files changed

+792
-803
lines changed

8 files changed

+792
-803
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 72 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -9617,17 +9617,22 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
96179617
Value *BasePtr = Ops[1];
96189618

96199619
// Does the store have an offset?
9620-
if (Ops.size() > (2 + N))
9620+
if (Ops.size() > 3)
96219621
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
96229622

9623+
Value *Val = Ops.back();
9624+
96239625
// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
96249626
// need to break up the tuple vector.
96259627
SmallVector<llvm::Value*, 5> Operands;
9626-
for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
9627-
Operands.push_back(Ops[I]);
9628+
unsigned MinElts = VTy->getElementCount().getKnownMinValue();
9629+
for (unsigned I = 0; I < N; ++I) {
9630+
Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9631+
Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
9632+
}
96289633
Operands.append({Predicate, BasePtr});
9629-
Function *F = CGM.getIntrinsic(IntID, { VTy });
96309634

9635+
Function *F = CGM.getIntrinsic(IntID, { VTy });
96319636
return Builder.CreateCall(F, Operands);
96329637
}
96339638

@@ -9934,24 +9939,26 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
99349939
return Call;
99359940
}
99369941

9937-
void CodeGenFunction::GetAArch64SVEProcessedOperands(
9938-
unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
9939-
SVETypeFlags TypeFlags) {
9942+
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
9943+
const CallExpr *E) {
99409944
// Find out if any arguments are required to be integer constant expressions.
99419945
unsigned ICEArguments = 0;
99429946
ASTContext::GetBuiltinTypeError Error;
99439947
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
99449948
assert(Error == ASTContext::GE_None && "Should not codegen an error");
99459949

9946-
// Tuple set/get only requires one insert/extract vector, which is
9947-
// created by EmitSVETupleSetOrGet.
9948-
bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
9950+
llvm::Type *Ty = ConvertType(E->getType());
9951+
if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9952+
BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9953+
Value *Val = EmitScalarExpr(E->getArg(0));
9954+
return EmitSVEReinterpret(Val, Ty);
9955+
}
99499956

9957+
llvm::SmallVector<Value *, 4> Ops;
99509958
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9951-
bool IsICE = ICEArguments & (1 << i);
9952-
Value *Arg = EmitScalarExpr(E->getArg(i));
9953-
9954-
if (IsICE) {
9959+
if ((ICEArguments & (1 << i)) == 0)
9960+
Ops.push_back(EmitScalarExpr(E->getArg(i)));
9961+
else {
99559962
// If this is required to be a constant, constant fold it so that we know
99569963
// that the generated intrinsic gets a ConstantInt.
99579964
std::optional<llvm::APSInt> Result =
@@ -9963,49 +9970,12 @@ void CodeGenFunction::GetAArch64SVEProcessedOperands(
99639970
// immediate requires more than a handful of bits.
99649971
*Result = Result->extOrTrunc(32);
99659972
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
9966-
continue;
9967-
}
9968-
9969-
if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
9970-
Ops.push_back(Arg);
9971-
continue;
9972-
}
9973-
9974-
auto *VTy = cast<ScalableVectorType>(Arg->getType());
9975-
unsigned MinElts = VTy->getMinNumElements();
9976-
bool IsPred = VTy->getElementType()->isIntegerTy(1);
9977-
unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
9978-
9979-
if (N == 1) {
9980-
Ops.push_back(Arg);
9981-
continue;
9982-
}
9983-
9984-
for (unsigned I = 0; I < N; ++I) {
9985-
Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
9986-
auto *NewVTy =
9987-
ScalableVectorType::get(VTy->getElementType(), MinElts / N);
9988-
Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
99899973
}
99909974
}
9991-
}
9992-
9993-
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
9994-
const CallExpr *E) {
9995-
llvm::Type *Ty = ConvertType(E->getType());
9996-
if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9997-
BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9998-
Value *Val = EmitScalarExpr(E->getArg(0));
9999-
return EmitSVEReinterpret(Val, Ty);
10000-
}
100019975

100029976
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
100039977
AArch64SVEIntrinsicsProvenSorted);
10004-
10005-
llvm::SmallVector<Value *, 4> Ops;
100069978
SVETypeFlags TypeFlags(Builtin->TypeModifier);
10007-
GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10008-
100099979
if (TypeFlags.isLoad())
100109980
return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
100119981
TypeFlags.isZExtReturn());
@@ -10019,14 +9989,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
100199989
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
100209990
else if (TypeFlags.isGatherPrefetch())
100219991
return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10022-
else if (TypeFlags.isStructLoad())
10023-
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10024-
else if (TypeFlags.isStructStore())
10025-
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9992+
else if (TypeFlags.isStructLoad())
9993+
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9994+
else if (TypeFlags.isStructStore())
9995+
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
100269996
else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10027-
return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
9997+
return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
100289998
else if (TypeFlags.isTupleCreate())
10029-
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
9999+
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
1003010000
else if (TypeFlags.isUndef())
1003110001
return UndefValue::get(Ty);
1003210002
else if (Builtin->LLVMIntrinsic != 0) {
@@ -10278,8 +10248,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1027810248
case SVE::BI__builtin_sve_svtbl2_f64: {
1027910249
SVETypeFlags TF(Builtin->TypeModifier);
1028010250
auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10251+
Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
10252+
ConstantInt::get(CGM.Int64Ty, 0));
10253+
unsigned MinElts = VTy->getMinNumElements();
10254+
Value *V1 = Builder.CreateExtractVector(
10255+
VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
1028110256
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10282-
return Builder.CreateCall(F, Ops);
10257+
return Builder.CreateCall(F, {V0, V1, Ops[1]});
1028310258
}
1028410259

1028510260
case SVE::BI__builtin_sve_svset_neonq_s8:
@@ -10337,13 +10312,35 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1033710312

1033810313
Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
1033910314
const CallExpr *E) {
10340-
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10341-
AArch64SMEIntrinsicsProvenSorted);
10315+
// Find out if any arguments are required to be integer constant expressions.
10316+
unsigned ICEArguments = 0;
10317+
ASTContext::GetBuiltinTypeError Error;
10318+
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10319+
assert(Error == ASTContext::GE_None && "Should not codegen an error");
1034210320

10321+
llvm::Type *Ty = ConvertType(E->getType());
1034310322
llvm::SmallVector<Value *, 4> Ops;
10344-
SVETypeFlags TypeFlags(Builtin->TypeModifier);
10345-
GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10323+
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10324+
if ((ICEArguments & (1 << i)) == 0)
10325+
Ops.push_back(EmitScalarExpr(E->getArg(i)));
10326+
else {
10327+
// If this is required to be a constant, constant fold it so that we know
10328+
// that the generated intrinsic gets a ConstantInt.
10329+
std::optional<llvm::APSInt> Result =
10330+
E->getArg(i)->getIntegerConstantExpr(getContext());
10331+
assert(Result && "Expected argument to be a constant");
10332+
10333+
// Immediates for SVE llvm intrinsics are always 32bit. We can safely
10334+
// truncate because the immediate has been range checked and no valid
10335+
// immediate requires more than a handful of bits.
10336+
*Result = Result->extOrTrunc(32);
10337+
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10338+
}
10339+
}
1034610340

10341+
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10342+
AArch64SMEIntrinsicsProvenSorted);
10343+
SVETypeFlags TypeFlags(Builtin->TypeModifier);
1034710344
if (TypeFlags.isLoad() || TypeFlags.isStore())
1034810345
return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
1034910346
else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
@@ -10356,24 +10353,21 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
1035610353
BuiltinID == SME::BI__builtin_sme_svldr_za ||
1035710354
BuiltinID == SME::BI__builtin_sme_svstr_za)
1035810355
return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10356+
else if (Builtin->LLVMIntrinsic != 0) {
10357+
// Predicates must match the main datatype.
10358+
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10359+
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10360+
if (PredTy->getElementType()->isIntegerTy(1))
10361+
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
1035910362

10360-
// Should not happen!
10361-
if (Builtin->LLVMIntrinsic == 0)
10362-
return nullptr;
10363-
10364-
// Predicates must match the main datatype.
10365-
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10366-
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10367-
if (PredTy->getElementType()->isIntegerTy(1))
10368-
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10369-
10370-
Function *F =
10371-
TypeFlags.isOverloadNone()
10372-
? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10373-
: CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10374-
Value *Call = Builder.CreateCall(F, Ops);
10363+
Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10364+
getSVEOverloadTypes(TypeFlags, Ty, Ops));
10365+
Value *Call = Builder.CreateCall(F, Ops);
10366+
return Call;
10367+
}
1037510368

10376-
return FormSVEBuiltinResult(Call);
10369+
/// Should not happen
10370+
return nullptr;
1037710371
}
1037810372

1037910373
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4311,11 +4311,6 @@ class CodeGenFunction : public CodeGenTypeCache {
43114311
llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags,
43124312
llvm::SmallVectorImpl<llvm::Value *> &Ops,
43134313
unsigned IntID);
4314-
4315-
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E,
4316-
SmallVectorImpl<llvm::Value *> &Ops,
4317-
SVETypeFlags TypeFlags);
4318-
43194314
llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43204315

43214316
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,18 @@
1616
#endif
1717
// CHECK-LABEL: @test_svst2_bf16(
1818
// CHECK-NEXT: entry:
19-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
20-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
21-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
22-
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
20+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
21+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
2323
// CHECK-NEXT: ret void
2424
//
2525
// CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t(
2626
// CPP-CHECK-NEXT: entry:
27-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
28-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
29-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
30-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
27+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
29+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
30+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
3131
// CPP-CHECK-NEXT: ret void
3232
//
3333
void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
@@ -37,20 +37,20 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
3737

3838
// CHECK-LABEL: @test_svst2_vnum_bf16(
3939
// CHECK-NEXT: entry:
40-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
41-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
42-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
43-
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
44-
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
40+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
41+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
42+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
43+
// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
44+
// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
4545
// CHECK-NEXT: ret void
4646
//
4747
// CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
51-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
52-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
53-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
51+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
52+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
53+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
5454
// CPP-CHECK-NEXT: ret void
5555
//
5656
void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data)

0 commit comments

Comments
 (0)