Skip to content

Commit df8e0ce

Browse files
[SVE] Extend getMemVTFromNode to cover the sret variants of sve.ld2/3/4.
This enables the use of reg+imm addressing modes to match the non-sret variants of these intrinsics. Differential Revision: https://reviews.llvm.org/D132392
1 parent 1dd0613 commit df8e0ce

File tree

2 files changed

+51
-73
lines changed

2 files changed

+51
-73
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5428,22 +5428,30 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
54285428
break;
54295429
}
54305430

5431-
if (Opcode != ISD::INTRINSIC_VOID)
5431+
if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
54325432
return EVT();
54335433

5434-
const unsigned IntNo =
5435-
cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
5436-
if (IntNo == Intrinsic::aarch64_sme_ldr ||
5437-
IntNo == Intrinsic::aarch64_sme_str)
5438-
return MVT::nxv16i8;
5439-
5440-
if (IntNo != Intrinsic::aarch64_sve_prf)
5434+
switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
5435+
default:
54415436
return EVT();
5442-
5443-
// We are using an SVE prefetch intrinsic. Type must be inferred
5444-
// from the width of the predicate.
5445-
return getPackedVectorTypeFromPredicateType(
5446-
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
5437+
case Intrinsic::aarch64_sme_ldr:
5438+
case Intrinsic::aarch64_sme_str:
5439+
return MVT::nxv16i8;
5440+
case Intrinsic::aarch64_sve_prf:
5441+
// We are using an SVE prefetch intrinsic. Type must be inferred from the
5442+
// width of the predicate.
5443+
return getPackedVectorTypeFromPredicateType(
5444+
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
5445+
case Intrinsic::aarch64_sve_ld2_sret:
5446+
return getPackedVectorTypeFromPredicateType(
5447+
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
5448+
case Intrinsic::aarch64_sve_ld3_sret:
5449+
return getPackedVectorTypeFromPredicateType(
5450+
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
5451+
case Intrinsic::aarch64_sve_ld4_sret:
5452+
return getPackedVectorTypeFromPredicateType(
5453+
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
5454+
}
54475455
}
54485456

54495457
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:

llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll

Lines changed: 30 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
1313
; CHECK-LABEL: ld2.nxv32i8:
1414
; CHECK: // %bb.0:
15-
; CHECK-NEXT: rdvl x8, #2
16-
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8]
15+
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #2, mul vl]
1716
; CHECK-NEXT: ret
1817
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
1918
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8*
@@ -24,8 +23,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8(<vscale x 16 x i1
2423
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
2524
; CHECK-LABEL: ld2.nxv32i8_lower_bound:
2625
; CHECK: // %bb.0:
27-
; CHECK-NEXT: rdvl x8, #-16
28-
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8]
26+
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #-16, mul vl]
2927
; CHECK-NEXT: ret
3028
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
3129
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -36,8 +34,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8_lower_bound(<vsca
3634
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
3735
; CHECK-LABEL: ld2.nxv32i8_upper_bound:
3836
; CHECK: // %bb.0:
39-
; CHECK-NEXT: rdvl x8, #14
40-
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8]
37+
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #14, mul vl]
4138
; CHECK-NEXT: ret
4239
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
4340
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -85,8 +82,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8_outside_upper_bou
8582
define { <vscale x 8 x i16>, <vscale x 8 x i16> } @ld2.nxv16i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16>* %addr) {
8683
; CHECK-LABEL: ld2.nxv16i16:
8784
; CHECK: // %bb.0:
88-
; CHECK-NEXT: addvl x8, x0, #14
89-
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x8]
85+
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #14, mul vl]
9086
; CHECK-NEXT: ret
9187
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 14
9288
%base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -97,8 +93,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @ld2.nxv16i16(<vscale x 8 x i1
9793
define { <vscale x 8 x half>, <vscale x 8 x half> } @ld2.nxv16f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half>* %addr) {
9894
; CHECK-LABEL: ld2.nxv16f16:
9995
; CHECK: // %bb.0:
100-
; CHECK-NEXT: addvl x8, x0, #-16
101-
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x8]
96+
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #-16, mul vl]
10297
; CHECK-NEXT: ret
10398
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 -16
10499
%base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -109,8 +104,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @ld2.nxv16f16(<vscale x 8 x
109104
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld2.nxv16bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat>* %addr) #0 {
110105
; CHECK-LABEL: ld2.nxv16bf16:
111106
; CHECK: // %bb.0:
112-
; CHECK-NEXT: addvl x8, x0, #12
113-
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x8]
107+
; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #12, mul vl]
114108
; CHECK-NEXT: ret
115109
%base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 12
116110
%base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -122,8 +116,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld2.nxv16bf16(<vscale x
122116
define { <vscale x 4 x i32>, <vscale x 4 x i32> } @ld2.nxv8i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32>* %addr) {
123117
; CHECK-LABEL: ld2.nxv8i32:
124118
; CHECK: // %bb.0:
125-
; CHECK-NEXT: addvl x8, x0, #14
126-
; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x8]
119+
; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, #14, mul vl]
127120
; CHECK-NEXT: ret
128121
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 14
129122
%base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -134,8 +127,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @ld2.nxv8i32(<vscale x 4 x i1>
134127
define { <vscale x 4 x float>, <vscale x 4 x float> } @ld2.nxv8f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float>* %addr) {
135128
; CHECK-LABEL: ld2.nxv8f32:
136129
; CHECK: // %bb.0:
137-
; CHECK-NEXT: addvl x8, x0, #-16
138-
; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x8]
130+
; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, #-16, mul vl]
139131
; CHECK-NEXT: ret
140132
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -16
141133
%base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -147,8 +139,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @ld2.nxv8f32(<vscale x 4 x
147139
define { <vscale x 2 x i64>, <vscale x 2 x i64> } @ld2.nxv4i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64>* %addr) {
148140
; CHECK-LABEL: ld2.nxv4i64:
149141
; CHECK: // %bb.0:
150-
; CHECK-NEXT: addvl x8, x0, #14
151-
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8]
142+
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, #14, mul vl]
152143
; CHECK-NEXT: ret
153144
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 14
154145
%base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -159,8 +150,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @ld2.nxv4i64(<vscale x 2 x i1>
159150
define { <vscale x 2 x double>, <vscale x 2 x double> } @ld2.nxv4f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double>* %addr) {
160151
; CHECK-LABEL: ld2.nxv4f64:
161152
; CHECK: // %bb.0:
162-
; CHECK-NEXT: addvl x8, x0, #-16
163-
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8]
153+
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, #-16, mul vl]
164154
; CHECK-NEXT: ret
165155
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -16
166156
%base_ptr = bitcast <vscale x 2 x double>* %base to double *
@@ -172,8 +162,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @ld2.nxv4f64(<vscale x 2
172162
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
173163
; CHECK-LABEL: ld3.nxv48i8:
174164
; CHECK: // %bb.0:
175-
; CHECK-NEXT: rdvl x8, #3
176-
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
165+
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, #3, mul vl]
177166
; CHECK-NEXT: ret
178167
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
179168
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -184,8 +173,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
184173
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
185174
; CHECK-LABEL: ld3.nxv48i8_lower_bound:
186175
; CHECK: // %bb.0:
187-
; CHECK-NEXT: rdvl x8, #-24
188-
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
176+
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, #-24, mul vl]
189177
; CHECK-NEXT: ret
190178
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
191179
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -196,8 +184,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
196184
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
197185
; CHECK-LABEL: ld3.nxv48i8_upper_bound:
198186
; CHECK: // %bb.0:
199-
; CHECK-NEXT: rdvl x8, #21
200-
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
187+
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, #21, mul vl]
201188
; CHECK-NEXT: ret
202189
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
203190
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -257,8 +244,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
257244
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16> *%addr) {
258245
; CHECK-LABEL: ld3.nxv24i16:
259246
; CHECK: // %bb.0:
260-
; CHECK-NEXT: addvl x8, x0, #21
261-
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
247+
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, #21, mul vl]
262248
; CHECK-NEXT: ret
263249
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 21
264250
%base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -269,8 +255,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24
269255
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nxv24f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half> *%addr) {
270256
; CHECK-LABEL: ld3.nxv24f16:
271257
; CHECK: // %bb.0:
272-
; CHECK-NEXT: addvl x8, x0, #21
273-
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
258+
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, #21, mul vl]
274259
; CHECK-NEXT: ret
275260
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 21
276261
%base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -281,8 +266,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nx
281266
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat> *%addr) #0 {
282267
; CHECK-LABEL: ld3.nxv24bf16:
283268
; CHECK: // %bb.0:
284-
; CHECK-NEXT: addvl x8, x0, #-24
285-
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
269+
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, #-24, mul vl]
286270
; CHECK-NEXT: ret
287271
%base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -24
288272
%base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -294,8 +278,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @
294278
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> *%addr) {
295279
; CHECK-LABEL: ld3.nxv12i32:
296280
; CHECK: // %bb.0:
297-
; CHECK-NEXT: addvl x8, x0, #21
298-
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8]
281+
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, #21, mul vl]
299282
; CHECK-NEXT: ret
300283
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 21
301284
%base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -306,8 +289,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12
306289
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3.nxv12f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float> *%addr) {
307290
; CHECK-LABEL: ld3.nxv12f32:
308291
; CHECK: // %bb.0:
309-
; CHECK-NEXT: addvl x8, x0, #-24
310-
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8]
292+
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, #-24, mul vl]
311293
; CHECK-NEXT: ret
312294
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -24
313295
%base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -319,8 +301,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3
319301
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> *%addr) {
320302
; CHECK-LABEL: ld3.nxv6i64:
321303
; CHECK: // %bb.0:
322-
; CHECK-NEXT: addvl x8, x0, #21
323-
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8]
304+
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, #21, mul vl]
324305
; CHECK-NEXT: ret
325306
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 21
326307
%base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -331,8 +312,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i
331312
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld3.nxv6f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double> *%addr) {
332313
; CHECK-LABEL: ld3.nxv6f64:
333314
; CHECK: // %bb.0:
334-
; CHECK-NEXT: addvl x8, x0, #-24
335-
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8]
315+
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, #-24, mul vl]
336316
; CHECK-NEXT: ret
337317
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -24
338318
%base_ptr = bitcast <vscale x 2 x double>* %base to double *
@@ -344,8 +324,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @
344324
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
345325
; CHECK-LABEL: ld4.nxv64i8:
346326
; CHECK: // %bb.0:
347-
; CHECK-NEXT: rdvl x8, #4
348-
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
327+
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, #4, mul vl]
349328
; CHECK-NEXT: ret
350329
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
351330
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -356,8 +335,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
356335
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
357336
; CHECK-LABEL: ld4.nxv64i8_lower_bound:
358337
; CHECK: // %bb.0:
359-
; CHECK-NEXT: rdvl x8, #-32
360-
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
338+
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, #-32, mul vl]
361339
; CHECK-NEXT: ret
362340
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
363341
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -368,8 +346,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
368346
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
369347
; CHECK-LABEL: ld4.nxv64i8_upper_bound:
370348
; CHECK: // %bb.0:
371-
; CHECK-NEXT: rdvl x8, #28
372-
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
349+
; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, #28, mul vl]
373350
; CHECK-NEXT: ret
374351
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
375352
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -455,8 +432,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
455432
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld4.nxv32i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16> *%addr) {
456433
; CHECK-LABEL: ld4.nxv32i16:
457434
; CHECK: // %bb.0:
458-
; CHECK-NEXT: addvl x8, x0, #8
459-
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
435+
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, #8, mul vl]
460436
; CHECK-NEXT: ret
461437
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
462438
%base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -467,8 +443,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
467443
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld4.nxv32f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half> *%addr) {
468444
; CHECK-LABEL: ld4.nxv32f16:
469445
; CHECK: // %bb.0:
470-
; CHECK-NEXT: addvl x8, x0, #28
471-
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
446+
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, #28, mul vl]
472447
; CHECK-NEXT: ret
473448
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 28
474449
%base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -479,8 +454,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
479454
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat> *%addr) #0 {
480455
; CHECK-LABEL: ld4.nxv32bf16:
481456
; CHECK: // %bb.0:
482-
; CHECK-NEXT: addvl x8, x0, #-32
483-
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
457+
; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, #-32, mul vl]
484458
; CHECK-NEXT: ret
485459
%base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -32
486460
%base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -492,8 +466,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
492466
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld4.nxv16i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> *%addr) {
493467
; CHECK-LABEL: ld4.nxv16i32:
494468
; CHECK: // %bb.0:
495-
; CHECK-NEXT: addvl x8, x0, #28
496-
; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8]
469+
; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, #28, mul vl]
497470
; CHECK-NEXT: ret
498471
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 28
499472
%base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -504,8 +477,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
504477
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld4.nxv16f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float>* %addr) {
505478
; CHECK-LABEL: ld4.nxv16f32:
506479
; CHECK: // %bb.0:
507-
; CHECK-NEXT: addvl x8, x0, #-32
508-
; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8]
480+
; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, #-32, mul vl]
509481
; CHECK-NEXT: ret
510482
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -32
511483
%base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -517,8 +489,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
517489
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld4.nxv8i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> *%addr) {
518490
; CHECK-LABEL: ld4.nxv8i64:
519491
; CHECK: // %bb.0:
520-
; CHECK-NEXT: addvl x8, x0, #28
521-
; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8]
492+
; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, #28, mul vl]
522493
; CHECK-NEXT: ret
523494
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 28
524495
%base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -529,8 +500,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
529500
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld4.nxv8f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double> *%addr) {
530501
; CHECK-LABEL: ld4.nxv8f64:
531502
; CHECK: // %bb.0:
532-
; CHECK-NEXT: addvl x8, x0, #-32
533-
; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8]
503+
; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, #-32, mul vl]
534504
; CHECK-NEXT: ret
535505
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -32
536506
%base_ptr = bitcast <vscale x 2 x double>* %base to double *

0 commit comments

Comments
 (0)