Skip to content

Commit 823e2a6

Browse files
committed
[SveEmitter] Add builtins for contiguous prefetches
This patch also adds the enum `sv_prfop` for the prefetch operation specifier and checks to ensure the passed enum values are valid. Reviewers: SjoerdMeijer, efriedma, ctetreau Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78674
1 parent 9cd4deb commit 823e2a6

File tree

14 files changed

+654
-7
lines changed

14 files changed

+654
-7
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ namespace clang {
238238
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
239239
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
240240
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
241+
bool isPrefetch() const { return Flags & IsPrefetch; }
241242

242243
uint64_t getBits() const { return Flags; }
243244
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
// -------------------
5959
// prototype: return (arg, arg, ...)
6060
//
61+
// v: void
6162
// x: vector of signed integers
6263
// u: vector of unsigned integers
6364
// d: default
@@ -82,6 +83,7 @@
8283
// M: svfloat32_t
8384
// N: svfloat64_t
8485

86+
// J: Prefetch type (sv_prfop)
8587
// A: pointer to int8_t
8688
// B: pointer to int16_t
8789
// C: pointer to int32_t
@@ -176,6 +178,7 @@ def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type)
176178
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
177179
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
178180
def IsByteIndexed : FlagType<0x01000000>;
181+
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
179182

180183
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
181184
class ImmCheckType<int val> {
@@ -193,6 +196,7 @@ def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt
193196
def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
194197
def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
195198
def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
199+
def ImmCheck0_13 : ImmCheckType<12>; // 0..13
196200

197201
class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
198202
int Arg = arg;
@@ -543,6 +547,21 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl
543547
// Store one vector, with no truncation, non-temporal (scalar base, VL displacement)
544548
def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">;
545549

550+
////////////////////////////////////////////////////////////////////////////////
551+
// Prefetches
552+
553+
// Prefetch (Scalar base)
554+
def SVPRFB : MInst<"svprfb", "vPcJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">;
555+
def SVPRFH : MInst<"svprfh", "vPcJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">;
556+
def SVPRFW : MInst<"svprfw", "vPcJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
557+
def SVPRFD : MInst<"svprfd", "vPcJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;
558+
559+
// Prefetch (Scalar base, VL displacement)
560+
def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPclJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">;
561+
def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">;
562+
def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
563+
def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;
564+
546565
////////////////////////////////////////////////////////////////////////////////
547566
// Integer arithmetic
548567

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7568,6 +7568,13 @@ llvm::VectorType *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
75687568
}
75697569
}
75707570

7571+
constexpr unsigned SVEBitsPerBlock = 128;
7572+
7573+
static llvm::VectorType* getSVEVectorForElementType(llvm::Type *EltTy) {
7574+
unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
7575+
return llvm::VectorType::get(EltTy, { NumElts, true });
7576+
}
7577+
75717578
// Reinterpret the input predicate so that it can be used to correctly isolate
75727579
// the elements of the specified datatype.
75737580
Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
@@ -7707,6 +7714,30 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
77077714
return Builder.CreateCall(F, Ops);
77087715
}
77097716

7717+
Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
7718+
SmallVectorImpl<Value *> &Ops,
7719+
unsigned BuiltinID) {
7720+
auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
7721+
auto *VectorTy = getSVEVectorForElementType(MemEltTy);
7722+
auto *MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount());
7723+
7724+
Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
7725+
Value *BasePtr = Ops[1];
7726+
7727+
// Implement the index operand if not omitted.
7728+
if (Ops.size() > 3) {
7729+
BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
7730+
BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
7731+
}
7732+
7733+
// Prefetch intriniscs always expect an i8*
7734+
BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
7735+
Value *PrfOp = Ops.back();
7736+
7737+
Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
7738+
return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
7739+
}
7740+
77107741
Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
77117742
llvm::Type *ReturnTy,
77127743
SmallVectorImpl<Value *> &Ops,
@@ -7759,13 +7790,6 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
77597790
return Builder.CreateCall(F, {Val, Predicate, BasePtr});
77607791
}
77617792

7762-
constexpr unsigned SVEBitsPerBlock = 128;
7763-
7764-
static llvm::VectorType* getSVEVectorForElementType(llvm::Type *EltTy) {
7765-
unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
7766-
return llvm::VectorType::get(EltTy, { NumElts, true });
7767-
}
7768-
77697793
// Limit the usage of scalable llvm IR generated by the ACLE by using the
77707794
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
77717795
Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
@@ -7847,6 +7871,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
78477871
return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
78487872
else if (TypeFlags.isScatterStore())
78497873
return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
7874+
else if (TypeFlags.isPrefetch())
7875+
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
78507876
else if (Builtin->LLVMIntrinsic != 0) {
78517877
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
78527878
InsertExplicitZeroOperand(Builder, Ty, Ops);

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3927,6 +3927,9 @@ class CodeGenFunction : public CodeGenTypeCache {
39273927
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
39283928
SmallVectorImpl<llvm::Value *> &Ops,
39293929
unsigned BuiltinID);
3930+
llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
3931+
SmallVectorImpl<llvm::Value *> &Ops,
3932+
unsigned BuiltinID);
39303933
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
39313934

39323935
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,

clang/lib/Sema/SemaChecking.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,10 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
20422042
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
20432043
HasError = true;
20442044
break;
2045+
case SVETypeFlags::ImmCheck0_13:
2046+
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
2047+
HasError = true;
2048+
break;
20452049
case SVETypeFlags::ImmCheck1_16:
20462050
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
20472051
HasError = true;
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
11+
#endif
12+
13+
void test_svprfb(svbool_t pg, const void *base)
14+
{
15+
// CHECK-LABEL: test_svprfb
16+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 0)
17+
return svprfb(pg, base, SV_PLDL1KEEP);
18+
}
19+
20+
void test_svprfb_1(svbool_t pg, const void *base)
21+
{
22+
// CHECK-LABEL: test_svprfb_1
23+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 1)
24+
return svprfb(pg, base, SV_PLDL1STRM);
25+
}
26+
27+
void test_svprfb_2(svbool_t pg, const void *base)
28+
{
29+
// CHECK-LABEL: test_svprfb_2
30+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 2)
31+
return svprfb(pg, base, SV_PLDL2KEEP);
32+
}
33+
34+
void test_svprfb_3(svbool_t pg, const void *base)
35+
{
36+
// CHECK-LABEL: test_svprfb_3
37+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 3)
38+
return svprfb(pg, base, SV_PLDL2STRM);
39+
}
40+
41+
void test_svprfb_4(svbool_t pg, const void *base)
42+
{
43+
// CHECK-LABEL: test_svprfb_4
44+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 4)
45+
return svprfb(pg, base, SV_PLDL3KEEP);
46+
}
47+
48+
void test_svprfb_5(svbool_t pg, const void *base)
49+
{
50+
// CHECK-LABEL: test_svprfb_5
51+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 5)
52+
return svprfb(pg, base, SV_PLDL3STRM);
53+
}
54+
55+
void test_svprfb_6(svbool_t pg, const void *base)
56+
{
57+
// CHECK-LABEL: test_svprfb_6
58+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 8)
59+
return svprfb(pg, base, SV_PSTL1KEEP);
60+
}
61+
62+
void test_svprfb_7(svbool_t pg, const void *base)
63+
{
64+
// CHECK-LABEL: test_svprfb_7
65+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 9)
66+
return svprfb(pg, base, SV_PSTL1STRM);
67+
}
68+
69+
void test_svprfb_8(svbool_t pg, const void *base)
70+
{
71+
// CHECK-LABEL: test_svprfb_8
72+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 10)
73+
return svprfb(pg, base, SV_PSTL2KEEP);
74+
}
75+
76+
void test_svprfb_9(svbool_t pg, const void *base)
77+
{
78+
// CHECK-LABEL: test_svprfb_9
79+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 11)
80+
return svprfb(pg, base, SV_PSTL2STRM);
81+
}
82+
83+
void test_svprfb_10(svbool_t pg, const void *base)
84+
{
85+
// CHECK-LABEL: test_svprfb_10
86+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 12)
87+
return svprfb(pg, base, SV_PSTL3KEEP);
88+
}
89+
90+
void test_svprfb_11(svbool_t pg, const void *base)
91+
{
92+
// CHECK-LABEL: test_svprfb_11
93+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 13)
94+
return svprfb(pg, base, SV_PSTL3STRM);
95+
}
96+
97+
void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
98+
{
99+
// CHECK-LABEL: test_svprfb_vnum
100+
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
101+
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
102+
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]], i32 0)
103+
return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
104+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
11+
#endif
12+
13+
void test_svprfd(svbool_t pg, const void *base)
14+
{
15+
// CHECK-LABEL: test_svprfd
16+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
17+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 0)
18+
return svprfd(pg, base, SV_PLDL1KEEP);
19+
}
20+
21+
void test_svprfd_1(svbool_t pg, const void *base)
22+
{
23+
// CHECK-LABEL: test_svprfd_1
24+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
25+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 1)
26+
return svprfd(pg, base, SV_PLDL1STRM);
27+
}
28+
29+
void test_svprfd_2(svbool_t pg, const void *base)
30+
{
31+
// CHECK-LABEL: test_svprfd_2
32+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
33+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 2)
34+
return svprfd(pg, base, SV_PLDL2KEEP);
35+
}
36+
37+
void test_svprfd_3(svbool_t pg, const void *base)
38+
{
39+
// CHECK-LABEL: test_svprfd_3
40+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
41+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 3)
42+
return svprfd(pg, base, SV_PLDL2STRM);
43+
}
44+
45+
void test_svprfd_4(svbool_t pg, const void *base)
46+
{
47+
// CHECK-LABEL: test_svprfd_4
48+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
49+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 4)
50+
return svprfd(pg, base, SV_PLDL3KEEP);
51+
}
52+
53+
void test_svprfd_5(svbool_t pg, const void *base)
54+
{
55+
// CHECK-LABEL: test_svprfd_5
56+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
57+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 5)
58+
return svprfd(pg, base, SV_PLDL3STRM);
59+
}
60+
61+
void test_svprfd_6(svbool_t pg, const void *base)
62+
{
63+
// CHECK-LABEL: test_svprfd_6
64+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
65+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 8)
66+
return svprfd(pg, base, SV_PSTL1KEEP);
67+
}
68+
69+
void test_svprfd_7(svbool_t pg, const void *base)
70+
{
71+
// CHECK-LABEL: test_svprfd_7
72+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
73+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 9)
74+
return svprfd(pg, base, SV_PSTL1STRM);
75+
}
76+
77+
void test_svprfd_8(svbool_t pg, const void *base)
78+
{
79+
// CHECK-LABEL: test_svprfd_8
80+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
81+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 10)
82+
return svprfd(pg, base, SV_PSTL2KEEP);
83+
}
84+
85+
void test_svprfd_9(svbool_t pg, const void *base)
86+
{
87+
// CHECK-LABEL: test_svprfd_9
88+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
89+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 11)
90+
return svprfd(pg, base, SV_PSTL2STRM);
91+
}
92+
93+
void test_svprfd_10(svbool_t pg, const void *base)
94+
{
95+
// CHECK-LABEL: test_svprfd_10
96+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
97+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 12)
98+
return svprfd(pg, base, SV_PSTL3KEEP);
99+
}
100+
101+
void test_svprfd_11(svbool_t pg, const void *base)
102+
{
103+
// CHECK-LABEL: test_svprfd_11
104+
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
105+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 13)
106+
return svprfd(pg, base, SV_PSTL3STRM);
107+
}
108+
109+
void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum)
110+
{
111+
// CHECK-LABEL: test_svprfd_vnum
112+
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
113+
// CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i64>*
114+
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
115+
// CHECK-DAG: %[[I8_BASE:.*]] = bitcast <vscale x 2 x i64>* %[[GEP]] to i8*
116+
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
117+
return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP);
118+
}

0 commit comments

Comments
 (0)