Skip to content

Commit 52e4084

Browse files
[SVE][CodeGen] Vector + immediate addressing mode for masked gather/scatter
This patch extends LowerMGATHER/MSCATTER to make use of the vector + reg/immediate addressing modes for scalable masked gathers & scatters. selectGatherScatterAddrMode checks if the base pointer is null, in which case we can swap the base pointer and the index, e.g. getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices) -> getelementptr %offset, <vscale x N x T> %indices Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D93132
1 parent 992fad0 commit 52e4084

8 files changed

+847
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3812,6 +3812,8 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
38123812
return Opcode;
38133813
case AArch64ISD::GLD1_MERGE_ZERO:
38143814
return AArch64ISD::GLD1S_MERGE_ZERO;
3815+
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
3816+
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
38153817
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
38163818
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
38173819
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
@@ -3843,6 +3845,60 @@ bool getGatherScatterIndexIsExtended(SDValue Index) {
38433845
return false;
38443846
}
38453847

3848+
// If the base pointer of a masked gather or scatter is null, we
3849+
// may be able to swap BasePtr & Index and use the vector + register
3850+
// or vector + immediate addressing mode, e.g.
3851+
// VECTOR + REGISTER:
3852+
// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
3853+
// -> getelementptr %offset, <vscale x N x T> %indices
3854+
// VECTOR + IMMEDIATE:
3855+
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
3856+
// -> getelementptr #x, <vscale x N x T> %indices
3857+
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
3858+
unsigned &Opcode, bool IsGather,
3859+
SelectionDAG &DAG) {
3860+
if (!isNullConstant(BasePtr))
3861+
return;
3862+
3863+
ConstantSDNode *Offset = nullptr;
3864+
if (Index.getOpcode() == ISD::ADD)
3865+
if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
3866+
if (isa<ConstantSDNode>(SplatVal))
3867+
Offset = cast<ConstantSDNode>(SplatVal);
3868+
else {
3869+
BasePtr = SplatVal;
3870+
Index = Index->getOperand(0);
3871+
return;
3872+
}
3873+
}
3874+
3875+
unsigned NewOp =
3876+
IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
3877+
3878+
if (!Offset) {
3879+
std::swap(BasePtr, Index);
3880+
Opcode = NewOp;
3881+
return;
3882+
}
3883+
3884+
uint64_t OffsetVal = Offset->getZExtValue();
3885+
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
3886+
auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
3887+
3888+
if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
3889+
// Index is out of range for the immediate addressing mode
3890+
BasePtr = ConstOffset;
3891+
Index = Index->getOperand(0);
3892+
return;
3893+
}
3894+
3895+
// Immediate is in range
3896+
Opcode = NewOp;
3897+
BasePtr = Index->getOperand(0);
3898+
Index = ConstOffset;
3899+
return;
3900+
}
3901+
38463902
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
38473903
SelectionDAG &DAG) const {
38483904
SDLoc DL(Op);
@@ -3892,6 +3948,9 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
38923948
Index = Index.getOperand(0);
38933949

38943950
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
3951+
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
3952+
/*isGather=*/true, DAG);
3953+
38953954
if (ResNeedsSignExtend)
38963955
Opcode = getSignExtendedGatherOpcode(Opcode);
38973956

@@ -3944,9 +4003,12 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
39444003
if (getGatherScatterIndexIsExtended(Index))
39454004
Index = Index.getOperand(0);
39464005

4006+
unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4007+
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4008+
/*isGather=*/false, DAG);
4009+
39474010
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
3948-
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
3949-
VTs, Ops);
4011+
return DAG.getNode(Opcode, DL, VTs, Ops);
39504012
}
39514013

39524014
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.

llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,29 @@ define <vscale x 2 x i64> @masked_sgather_zext(i8* %base, <vscale x 2 x i64> %of
4444

4545
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
4646

47+
; Code generate load of an illegal datatype via promotion.
48+
define <vscale x 2 x i8> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
49+
; CHECK-LABEL: masked_gather_nxv2i8:
50+
; CHECK: ld1sb { z0.d }, p0/z, [z0.d]
51+
; CHECK: ret
52+
%data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
53+
ret <vscale x 2 x i8> %data
54+
}
55+
56+
; Code generate load of an illegal datatype via promotion.
57+
define <vscale x 2 x i16> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
58+
; CHECK-LABEL: masked_gather_nxv2i16:
59+
; CHECK: ld1sh { z0.d }, p0/z, [z0.d]
60+
; CHECK: ret
61+
%data = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
62+
ret <vscale x 2 x i16> %data
63+
}
64+
4765
; Code generate load of an illegal datatype via promotion.
4866
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
4967
; CHECK-LABEL: masked_gather_nxv2i32:
50-
; CHECK-DAG: mov x8, xzr
51-
; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d]
52-
; CHECK: ret
68+
; CHECK: ld1sw { z0.d }, p0/z, [z0.d]
69+
; CHECK: ret
5370
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
5471
ret <vscale x 2 x i32> %data
5572
}
@@ -92,11 +109,10 @@ define <vscale x 32 x i32> @masked_gather_nxv32i32(i32* %base, <vscale x 32 x i3
92109
define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mask) {
93110
; CHECK-LABEL: masked_sgather_nxv4i8:
94111
; CHECK: pfalse p1.b
95-
; CHECK-NEXT: mov x8, xzr
96112
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
97113
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
98-
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d]
99-
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
114+
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [z1.d]
115+
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
100116
; CHECK-NEXT: ptrue p0.s
101117
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
102118
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
@@ -109,8 +125,6 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsca
109125
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
110126
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
111127
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
112-
113128
declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
114-
115129
declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
116130
declare <vscale x 32 x i32> @llvm.masked.gather.nxv32i32(<vscale x 32 x i32*>, i32, <vscale x 32 x i1>, <vscale x 32 x i32>)
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
3+
4+
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
5+
; CHECK-LABEL: masked_gather_nxv2i8:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d, #1]
8+
; CHECK-NEXT: ret
9+
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
10+
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
11+
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
12+
ret <vscale x 2 x i64> %vals.zext
13+
}
14+
15+
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
16+
; CHECK-LABEL: masked_gather_nxv2i16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #2]
19+
; CHECK-NEXT: ret
20+
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
21+
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
22+
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
23+
ret <vscale x 2 x i64> %vals.zext
24+
}
25+
26+
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
27+
; CHECK-LABEL: masked_gather_nxv2i32:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4]
30+
; CHECK-NEXT: ret
31+
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
32+
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
33+
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
34+
ret <vscale x 2 x i64> %vals.zext
35+
}
36+
37+
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
38+
; CHECK-LABEL: masked_gather_nxv2i64:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #8]
41+
; CHECK-NEXT: ret
42+
%ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
43+
%vals.zext = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
44+
ret <vscale x 2 x i64> %vals.zext
45+
}
46+
47+
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
48+
; CHECK-LABEL: masked_gather_nxv2f16:
49+
; CHECK: // %bb.0:
50+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
51+
; CHECK-NEXT: ret
52+
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
53+
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
54+
ret <vscale x 2 x half> %vals
55+
}
56+
57+
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
58+
; CHECK-LABEL: masked_gather_nxv2bf16:
59+
; CHECK: // %bb.0:
60+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
61+
; CHECK-NEXT: ret
62+
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
63+
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
64+
ret <vscale x 2 x bfloat> %vals
65+
}
66+
67+
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
68+
; CHECK-LABEL: masked_gather_nxv2f32:
69+
; CHECK: // %bb.0:
70+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #12]
71+
; CHECK-NEXT: ret
72+
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
73+
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
74+
ret <vscale x 2 x float> %vals
75+
}
76+
77+
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
78+
; CHECK-LABEL: masked_gather_nxv2f64:
79+
; CHECK: // %bb.0:
80+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #32]
81+
; CHECK-NEXT: ret
82+
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
83+
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
84+
ret <vscale x 2 x double> %vals
85+
}
86+
87+
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
88+
; CHECK-LABEL: masked_sgather_nxv2i8:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d, #5]
91+
; CHECK-NEXT: ret
92+
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 5
93+
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
94+
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
95+
ret <vscale x 2 x i64> %vals.sext
96+
}
97+
98+
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
99+
; CHECK-LABEL: masked_sgather_nxv2i16:
100+
; CHECK: // %bb.0:
101+
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d, #12]
102+
; CHECK-NEXT: ret
103+
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 6
104+
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
105+
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
106+
ret <vscale x 2 x i64> %vals.sext
107+
}
108+
109+
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
110+
; CHECK-LABEL: masked_sgather_nxv2i32:
111+
; CHECK: // %bb.0:
112+
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d, #28]
113+
; CHECK-NEXT: ret
114+
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 7
115+
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
116+
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
117+
ret <vscale x 2 x i64> %vals.sext
118+
}
119+
120+
; Tests where the immediate is out of range
121+
122+
define <vscale x 2 x i64> @masked_gather_nxv2i8_range(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
123+
; CHECK-LABEL: masked_gather_nxv2i8_range:
124+
; CHECK: // %bb.0:
125+
; CHECK-NEXT: mov w8, #32
126+
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
127+
; CHECK-NEXT: ret
128+
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
129+
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
130+
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
131+
ret <vscale x 2 x i64> %vals.zext
132+
}
133+
134+
define <vscale x 2 x half> @masked_gather_nxv2f16_range(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
135+
; CHECK-LABEL: masked_gather_nxv2f16_range:
136+
; CHECK: // %bb.0:
137+
; CHECK-NEXT: mov w8, #64
138+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
139+
; CHECK-NEXT: ret
140+
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 32
141+
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
142+
ret <vscale x 2 x half> %vals
143+
}
144+
145+
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16_range(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
146+
; CHECK-LABEL: masked_gather_nxv2bf16_range:
147+
; CHECK: // %bb.0:
148+
; CHECK-NEXT: mov w8, #64
149+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
150+
; CHECK-NEXT: ret
151+
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 32
152+
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
153+
ret <vscale x 2 x bfloat> %vals
154+
}
155+
156+
define <vscale x 2 x float> @masked_gather_nxv2f32_range(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
157+
; CHECK-LABEL: masked_gather_nxv2f32_range:
158+
; CHECK: // %bb.0:
159+
; CHECK-NEXT: mov w8, #128
160+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
161+
; CHECK-NEXT: ret
162+
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 32
163+
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
164+
ret <vscale x 2 x float> %vals
165+
}
166+
167+
define <vscale x 2 x double> @masked_gather_nxv2f64_range(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
168+
; CHECK-LABEL: masked_gather_nxv2f64_range:
169+
; CHECK: // %bb.0:
170+
; CHECK-NEXT: mov w8, #256
171+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
172+
; CHECK-NEXT: ret
173+
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
174+
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
175+
ret <vscale x 2 x double> %vals
176+
}
177+
178+
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
179+
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
180+
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
181+
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
182+
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
183+
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
184+
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
185+
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
186+
attributes #0 = { "target-features"="+sve,+bf16" }

0 commit comments

Comments
 (0)