Skip to content

Commit 0c31ea5

Browse files
[Clang][SME2] Use tuple result of SME builtins directly. (#109423)
I missed a codepath during PR108008 so SME2/SVE2p1 builtins are converting their struct return type into a large vector, which is causing unnecessary casting via memory.
1 parent 0ef24aa commit 0c31ea5

30 files changed

+1340
-11672
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9880,6 +9880,22 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
98809880
return C;
98819881
}
98829882

9883+
Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
9884+
llvm::StructType *Ty) {
9885+
if (PredTuple->getType() == Ty)
9886+
return PredTuple;
9887+
9888+
Value *Ret = llvm::PoisonValue::get(Ty);
9889+
for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
9890+
Value *Pred = Builder.CreateExtractValue(PredTuple, I);
9891+
Pred = EmitSVEPredicateCast(
9892+
Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
9893+
Ret = Builder.CreateInsertValue(Ret, Pred, I);
9894+
}
9895+
9896+
return Ret;
9897+
}
9898+
98839899
Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
98849900
SmallVectorImpl<Value *> &Ops,
98859901
unsigned IntID) {
@@ -10386,41 +10402,6 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
1038610402
return Tuple;
1038710403
}
1038810404

10389-
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
10390-
// Multi-vector results should be broken up into a single (wide) result
10391-
// vector.
10392-
auto *StructTy = dyn_cast<StructType>(Call->getType());
10393-
if (!StructTy)
10394-
return Call;
10395-
10396-
auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10397-
if (!VTy)
10398-
return Call;
10399-
unsigned N = StructTy->getNumElements();
10400-
10401-
// We may need to emit a cast to a svbool_t
10402-
bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10403-
unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10404-
10405-
ScalableVectorType *WideVTy =
10406-
ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10407-
Value *Ret = llvm::PoisonValue::get(WideVTy);
10408-
for (unsigned I = 0; I < N; ++I) {
10409-
Value *SRet = Builder.CreateExtractValue(Call, I);
10410-
assert(SRet->getType() == VTy && "Unexpected type for result value");
10411-
Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10412-
10413-
if (IsPredTy)
10414-
SRet = EmitSVEPredicateCast(
10415-
SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10416-
10417-
Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10418-
}
10419-
Call = Ret;
10420-
10421-
return Call;
10422-
}
10423-
1042410405
void CodeGenFunction::GetAArch64SVEProcessedOperands(
1042510406
unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
1042610407
SVETypeFlags TypeFlags) {
@@ -10551,12 +10532,16 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1055110532
getSVEOverloadTypes(TypeFlags, Ty, Ops));
1055210533
Value *Call = Builder.CreateCall(F, Ops);
1055310534

10535+
if (Call->getType() == Ty)
10536+
return Call;
10537+
1055410538
// Predicate results must be converted to svbool_t.
10555-
if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10556-
if (PredTy->getScalarType()->isIntegerTy(1))
10557-
Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10539+
if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10540+
return EmitSVEPredicateCast(Call, PredTy);
10541+
if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10542+
return EmitSVEPredicateTupleCast(Call, PredTupleTy);
1055810543

10559-
return FormSVEBuiltinResult(Call);
10544+
llvm_unreachable("unsupported element count!");
1056010545
}
1056110546

1056210547
switch (BuiltinID) {
@@ -10888,9 +10873,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
1088810873
TypeFlags.isOverloadNone()
1088910874
? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
1089010875
: CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10891-
Value *Call = Builder.CreateCall(F, Ops);
1089210876

10893-
return FormSVEBuiltinResult(Call);
10877+
return Builder.CreateCall(F, Ops);
1089410878
}
1089510879

1089610880
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4646,6 +4646,8 @@ class CodeGenFunction : public CodeGenTypeCache {
46464646
unsigned BuiltinID);
46474647
llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred,
46484648
llvm::ScalableVectorType *VTy);
4649+
llvm::Value *EmitSVEPredicateTupleCast(llvm::Value *PredTuple,
4650+
llvm::StructType *Ty);
46494651
llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
46504652
llvm::SmallVectorImpl<llvm::Value *> &Ops,
46514653
unsigned IntID);
@@ -4670,12 +4672,6 @@ class CodeGenFunction : public CodeGenTypeCache {
46704672
llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
46714673
SmallVectorImpl<llvm::Value *> &Ops,
46724674
unsigned IntID);
4673-
/// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider
4674-
/// vector. It extracts the scalable vector from the struct and inserts into
4675-
/// the wider vector. This avoids the error when allocating space in llvm
4676-
/// for struct of scalable vectors if a function returns struct.
4677-
llvm::Value *FormSVEBuiltinResult(llvm::Value *Call);
4678-
46794675
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
46804676

46814677
llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,

0 commit comments

Comments
 (0)