@@ -9652,14 +9652,17 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9652
9652
case Intrinsic::aarch64_sve_ld2_sret:
9653
9653
case Intrinsic::aarch64_sve_ld1_pn_x2:
9654
9654
case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9655
+ case Intrinsic::aarch64_sve_ld2q_sret:
9655
9656
N = 2;
9656
9657
break;
9657
9658
case Intrinsic::aarch64_sve_ld3_sret:
9659
+ case Intrinsic::aarch64_sve_ld3q_sret:
9658
9660
N = 3;
9659
9661
break;
9660
9662
case Intrinsic::aarch64_sve_ld4_sret:
9661
9663
case Intrinsic::aarch64_sve_ld1_pn_x4:
9662
9664
case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9665
+ case Intrinsic::aarch64_sve_ld4q_sret:
9663
9666
N = 4;
9664
9667
break;
9665
9668
default:
@@ -9697,14 +9700,17 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
9697
9700
case Intrinsic::aarch64_sve_st2:
9698
9701
case Intrinsic::aarch64_sve_st1_pn_x2:
9699
9702
case Intrinsic::aarch64_sve_stnt1_pn_x2:
9703
+ case Intrinsic::aarch64_sve_st2q:
9700
9704
N = 2;
9701
9705
break;
9702
9706
case Intrinsic::aarch64_sve_st3:
9707
+ case Intrinsic::aarch64_sve_st3q:
9703
9708
N = 3;
9704
9709
break;
9705
9710
case Intrinsic::aarch64_sve_st4:
9706
9711
case Intrinsic::aarch64_sve_st1_pn_x4:
9707
9712
case Intrinsic::aarch64_sve_stnt1_pn_x4:
9713
+ case Intrinsic::aarch64_sve_st4q:
9708
9714
N = 4;
9709
9715
break;
9710
9716
default:
@@ -9780,7 +9786,7 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
9780
9786
Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
9781
9787
llvm::Type *ReturnTy,
9782
9788
SmallVectorImpl<Value *> &Ops,
9783
- unsigned BuiltinID ,
9789
+ unsigned IntrinsicID ,
9784
9790
bool IsZExtReturn) {
9785
9791
QualType LangPTy = E->getArg(1)->getType();
9786
9792
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
@@ -9789,28 +9795,46 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
9789
9795
// The vector type that is returned may be different from the
9790
9796
// eventual type loaded from memory.
9791
9797
auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
9792
- auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9798
+ llvm::ScalableVectorType *MemoryTy = nullptr;
9799
+ llvm::ScalableVectorType *PredTy = nullptr;
9800
+ bool IsQuadLoad = false;
9801
+ switch (IntrinsicID) {
9802
+ case Intrinsic::aarch64_sve_ld1uwq:
9803
+ case Intrinsic::aarch64_sve_ld1udq:
9804
+ MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
9805
+ PredTy = llvm::ScalableVectorType::get(
9806
+ llvm::Type::getInt1Ty(getLLVMContext()), 1);
9807
+ IsQuadLoad = true;
9808
+ break;
9809
+ default:
9810
+ MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9811
+ PredTy = MemoryTy;
9812
+ break;
9813
+ }
9793
9814
9794
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy );
9815
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy );
9795
9816
Value *BasePtr = Ops[1];
9796
9817
9797
9818
// Does the load have an offset?
9798
9819
if (Ops.size() > 2)
9799
9820
BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9800
9821
9801
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9822
+ Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
9802
9823
auto *Load =
9803
9824
cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
9804
9825
auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9805
9826
CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
9806
9827
9828
+ if (IsQuadLoad)
9829
+ return Load;
9830
+
9807
9831
return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
9808
- : Builder.CreateSExt(Load, VectorTy);
9832
+ : Builder.CreateSExt(Load, VectorTy);
9809
9833
}
9810
9834
9811
9835
Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
9812
9836
SmallVectorImpl<Value *> &Ops,
9813
- unsigned BuiltinID ) {
9837
+ unsigned IntrinsicID ) {
9814
9838
QualType LangPTy = E->getArg(1)->getType();
9815
9839
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9816
9840
LangPTy->castAs<PointerType>()->getPointeeType());
@@ -9820,17 +9844,34 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
9820
9844
auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9821
9845
auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9822
9846
9823
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9847
+ auto PredTy = MemoryTy;
9848
+ auto AddrMemoryTy = MemoryTy;
9849
+ bool IsQuadStore = false;
9850
+
9851
+ switch (IntrinsicID) {
9852
+ case Intrinsic::aarch64_sve_st1uwq:
9853
+ case Intrinsic::aarch64_sve_st1udq:
9854
+ AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
9855
+ PredTy =
9856
+ llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
9857
+ IsQuadStore = true;
9858
+ break;
9859
+ default:
9860
+ break;
9861
+ }
9862
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
9824
9863
Value *BasePtr = Ops[1];
9825
9864
9826
9865
// Does the store have an offset?
9827
9866
if (Ops.size() == 4)
9828
- BasePtr = Builder.CreateGEP(MemoryTy , BasePtr, Ops[2]);
9867
+ BasePtr = Builder.CreateGEP(AddrMemoryTy , BasePtr, Ops[2]);
9829
9868
9830
9869
// Last value is always the data
9831
- llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
9870
+ Value *Val =
9871
+ IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
9832
9872
9833
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9873
+ Function *F =
9874
+ CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
9834
9875
auto *Store =
9835
9876
cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
9836
9877
auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
0 commit comments