Skip to content

Commit 409edc6

Browse files
authored
[AArch64][SME] Fix bug on SMELd1St1 (#118109)
Patch[1] has update intrinsic interface for ld1/st1, while based on ARM's document, "If the intrinsic also has a vnum argument, the ZA slice number is calculated by adding vnum to slice.". But the "vnum" did not work for our realization now, this patch fix this point. [1]ee31ba0
1 parent 3da843b commit 409edc6

File tree

3 files changed

+229
-114
lines changed

3 files changed

+229
-114
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10646,7 +10646,7 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
1064610646
NewOps.push_back(Ops[2]);
1064710647

1064810648
llvm::Value *BasePtr = Ops[3];
10649-
10649+
llvm::Value *RealSlice = Ops[1];
1065010650
// If the intrinsic contains the vnum parameter, multiply it with the vector
1065110651
// size in bytes.
1065210652
if (Ops.size() == 5) {
@@ -10658,10 +10658,13 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
1065810658
Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
1065910659
// The type of the ptr parameter is void *, so use Int8Ty here.
1066010660
BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10661+
RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10662+
RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10663+
RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
1066110664
}
1066210665
NewOps.push_back(BasePtr);
1066310666
NewOps.push_back(Ops[0]);
10664-
NewOps.push_back(Ops[1]);
10667+
NewOps.push_back(RealSlice);
1066510668
Function *F = CGM.getIntrinsic(IntID);
1066610669
return Builder.CreateCall(F, NewOps);
1066710670
}

0 commit comments

Comments
 (0)