Skip to content

Commit 673da8c

Browse files
[LLVM][CodeGen][AArch64] Remove bogus lowering of sve_while{gt/ge/hi/hs} intrinsics. (#88126)
When fed constant operands we try to lower WHILE intrinsics to PTRUE using a fixed length pattern. This is not valid for the decrementing variants of WHILE because they construct their result predicate vector by traversing from high->low lanes whereas the incrementing variants and PTRUE traverse from low->high lanes. Whilst we can still utilise PTRUE by reversing its result I figure replacing a single WHILE with multiple SVE instructions is likely counterproductive.
1 parent 8095b9c commit 673da8c

File tree

2 files changed

+28
-223
lines changed

2 files changed

+28
-223
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5035,21 +5035,18 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
50355035
DAG.getTargetConstant(Pattern, DL, MVT::i32));
50365036
}
50375037

5038-
static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
5039-
bool IsLess, bool IsEqual) {
5038+
static SDValue optimizeIncrementingWhile(SDValue Op, SelectionDAG &DAG,
5039+
bool IsSigned, bool IsEqual) {
50405040
if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
50415041
!isa<ConstantSDNode>(Op.getOperand(2)))
50425042
return SDValue();
50435043

50445044
SDLoc dl(Op);
50455045
APInt X = Op.getConstantOperandAPInt(1);
50465046
APInt Y = Op.getConstantOperandAPInt(2);
5047-
APInt NumActiveElems;
50485047
bool Overflow;
5049-
if (IsLess)
5050-
NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
5051-
else
5052-
NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow);
5048+
APInt NumActiveElems =
5049+
IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
50535050

50545051
if (Overflow)
50555052
return SDValue();
@@ -5396,29 +5393,17 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
53965393
return SDValue();
53975394
}
53985395
case Intrinsic::aarch64_sve_whilelo:
5399-
return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
5400-
/*IsEqual=*/false);
5396+
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
5397+
/*IsEqual=*/false);
54015398
case Intrinsic::aarch64_sve_whilelt:
5402-
return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
5403-
/*IsEqual=*/false);
5399+
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
5400+
/*IsEqual=*/false);
54045401
case Intrinsic::aarch64_sve_whilels:
5405-
return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
5406-
/*IsEqual=*/true);
5402+
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
5403+
/*IsEqual=*/true);
54075404
case Intrinsic::aarch64_sve_whilele:
5408-
return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
5409-
/*IsEqual=*/true);
5410-
case Intrinsic::aarch64_sve_whilege:
5411-
return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
5412-
/*IsEqual=*/true);
5413-
case Intrinsic::aarch64_sve_whilegt:
5414-
return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
5415-
/*IsEqual=*/false);
5416-
case Intrinsic::aarch64_sve_whilehs:
5417-
return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
5418-
/*IsEqual=*/true);
5419-
case Intrinsic::aarch64_sve_whilehi:
5420-
return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
5421-
/*IsEqual=*/false);
5405+
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
5406+
/*IsEqual=*/true);
54225407
case Intrinsic::aarch64_sve_sunpkhi:
54235408
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
54245409
Op.getOperand(1));

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

Lines changed: 16 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -78,68 +78,16 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
7878
ret <vscale x 2 x i1> %out
7979
}
8080

81-
define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
82-
; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec:
83-
; CHECK: // %bb.0:
84-
; CHECK-NEXT: mov w8, #3
85-
; CHECK-NEXT: whilege p0.d, x8, xzr
86-
; CHECK-NEXT: ret
87-
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0)
88-
ret <vscale x 2 x i1> %out
89-
}
90-
81+
; Ensure we don't convert constant decrementing while instructions to ptrue.
9182
define <vscale x 16 x i1> @whilege_b_ii() {
9283
; CHECK-LABEL: whilege_b_ii:
9384
; CHECK: // %bb.0: // %entry
94-
; CHECK-NEXT: ptrue p0.b, vl6
95-
; CHECK-NEXT: ret
96-
entry:
97-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2)
98-
ret <vscale x 16 x i1> %out
99-
}
100-
101-
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
102-
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
103-
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: mov w8, #9
105-
; CHECK-NEXT: whilege p0.b, x8, xzr
106-
; CHECK-NEXT: ret
107-
entry:
108-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0)
109-
ret <vscale x 16 x i1> %out
110-
}
111-
112-
define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) {
113-
; CHECK-LABEL: whilege_b_ii_vl_maximum:
114-
; CHECK: // %bb.0:
115-
; CHECK-NEXT: ptrue p0.b, vl256
116-
; CHECK-NEXT: ret
117-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0)
118-
ret <vscale x 16 x i1> %out
119-
}
120-
121-
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_overflow() {
122-
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow:
123-
; CHECK: // %bb.0: // %entry
124-
; CHECK-NEXT: mov w8, #2
125-
; CHECK-NEXT: mov w9, #2147483647
126-
; CHECK-NEXT: movk w8, #32768, lsl #16
85+
; CHECK-NEXT: mov w8, #-2 // =0xfffffffe
86+
; CHECK-NEXT: mov w9, #3 // =0x3
12787
; CHECK-NEXT: whilege p0.b, w9, w8
12888
; CHECK-NEXT: ret
12989
entry:
130-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 2147483647, i32 -2147483646)
131-
ret <vscale x 16 x i1> %out
132-
}
133-
134-
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() {
135-
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow:
136-
; CHECK: // %bb.0: // %entry
137-
; CHECK-NEXT: mov w8, #2147483647
138-
; CHECK-NEXT: mov w9, #-2147483641
139-
; CHECK-NEXT: whilege p0.b, w9, w8
140-
; CHECK-NEXT: ret
141-
entry:
142-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647)
90+
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2)
14391
ret <vscale x 16 x i1> %out
14492
}
14593

@@ -219,69 +167,19 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
219167
ret <vscale x 2 x i1> %out
220168
}
221169

222-
define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
223-
; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec:
224-
; CHECK: // %bb.0:
225-
; CHECK-NEXT: mov w8, #3
226-
; CHECK-NEXT: whilehs p0.d, x8, xzr
227-
; CHECK-NEXT: ret
228-
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0)
229-
ret <vscale x 2 x i1> %out
230-
}
231-
170+
; Ensure we don't convert constant decrementing while instructions to ptrue.
232171
define <vscale x 16 x i1> @whilehs_b_ii() {
233172
; CHECK-LABEL: whilehs_b_ii:
234173
; CHECK: // %bb.0: // %entry
235-
; CHECK-NEXT: ptrue p0.b, vl7
174+
; CHECK-NEXT: mov w8, #2 // =0x2
175+
; CHECK-NEXT: mov w9, #8 // =0x8
176+
; CHECK-NEXT: whilehs p0.b, x9, x8
236177
; CHECK-NEXT: ret
237178
entry:
238179
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2)
239180
ret <vscale x 16 x i1> %out
240181
}
241182

242-
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
243-
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
244-
; CHECK: // %bb.0: // %entry
245-
; CHECK-NEXT: mov w8, #9
246-
; CHECK-NEXT: whilehs p0.b, x8, xzr
247-
; CHECK-NEXT: ret
248-
entry:
249-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0)
250-
ret <vscale x 16 x i1> %out
251-
}
252-
253-
define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) {
254-
; CHECK-LABEL: whilehs_b_ii_vl_maximum:
255-
; CHECK: // %bb.0:
256-
; CHECK-NEXT: ptrue p0.b, vl256
257-
; CHECK-NEXT: ret
258-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0)
259-
ret <vscale x 16 x i1> %out
260-
}
261-
262-
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_overflow() {
263-
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow:
264-
; CHECK: // %bb.0: // %entry
265-
; CHECK-NEXT: mov w8, #-1
266-
; CHECK-NEXT: mov w9, #6
267-
; CHECK-NEXT: whilehs p0.b, w9, w8
268-
; CHECK-NEXT: ret
269-
entry:
270-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295)
271-
ret <vscale x 16 x i1> %out
272-
}
273-
274-
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() {
275-
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow:
276-
; CHECK: // %bb.0: // %entry
277-
; CHECK-NEXT: mov w8, #-1
278-
; CHECK-NEXT: whilehs p0.b, w8, wzr
279-
; CHECK-NEXT: ret
280-
entry:
281-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 4294967295, i32 0)
282-
ret <vscale x 16 x i1> %out
283-
}
284-
285183
;
286184
; WHILEGT
287185
;
@@ -358,55 +256,16 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
358256
ret <vscale x 2 x i1> %out
359257
}
360258

361-
define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
362-
; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
363-
; CHECK: // %bb.0:
364-
; CHECK-NEXT: mov w8, #3
365-
; CHECK-NEXT: whilegt p0.d, x8, xzr
366-
; CHECK-NEXT: ret
367-
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0)
368-
ret <vscale x 2 x i1> %out
369-
}
370-
259+
; Ensure we don't convert constant decrementing while instructions to ptrue.
371260
define <vscale x 16 x i1> @whilegt_b_ii() {
372261
; CHECK-LABEL: whilegt_b_ii:
373262
; CHECK: // %bb.0: // %entry
374-
; CHECK-NEXT: ptrue p0.b, vl5
375-
; CHECK-NEXT: ret
376-
entry:
377-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2)
378-
ret <vscale x 16 x i1> %out
379-
}
380-
381-
define <vscale x 16 x i1> @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() {
382-
; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9:
383-
; CHECK: // %bb.0: // %entry
384-
; CHECK-NEXT: mov w8, #9
385-
; CHECK-NEXT: whilegt p0.b, x8, xzr
386-
; CHECK-NEXT: ret
387-
entry:
388-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 9, i64 0)
389-
ret <vscale x 16 x i1> %out
390-
}
391-
392-
define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) {
393-
; CHECK-LABEL: whilegt_b_ii_vl_maximum:
394-
; CHECK: // %bb.0:
395-
; CHECK-NEXT: ptrue p0.b, vl256
396-
; CHECK-NEXT: ret
397-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0)
398-
ret <vscale x 16 x i1> %out
399-
}
400-
401-
define <vscale x 16 x i1> @whilegt_b_ii_dont_fold_to_ptrue_overflow() {
402-
; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow:
403-
; CHECK: // %bb.0: // %entry
404-
; CHECK-NEXT: mov w8, #2147483647
405-
; CHECK-NEXT: mov w9, #-2147483641
263+
; CHECK-NEXT: mov w8, #-2 // =0xfffffffe
264+
; CHECK-NEXT: mov w9, #3 // =0x3
406265
; CHECK-NEXT: whilegt p0.b, w9, w8
407266
; CHECK-NEXT: ret
408267
entry:
409-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647)
268+
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2)
410269
ret <vscale x 16 x i1> %out
411270
}
412271

@@ -486,58 +345,19 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
486345
ret <vscale x 2 x i1> %out
487346
}
488347

489-
define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
490-
; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec:
491-
; CHECK: // %bb.0:
492-
; CHECK-NEXT: mov w8, #3
493-
; CHECK-NEXT: whilehi p0.d, x8, xzr
494-
; CHECK-NEXT: ret
495-
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0)
496-
ret <vscale x 2 x i1> %out
497-
}
498-
348+
; Ensure we don't convert constant decrementing while instructions to ptrue.
499349
define <vscale x 16 x i1> @whilehi_b_ii() {
500350
; CHECK-LABEL: whilehi_b_ii:
501351
; CHECK: // %bb.0: // %entry
502-
; CHECK-NEXT: ptrue p0.b, vl6
352+
; CHECK-NEXT: mov w8, #2 // =0x2
353+
; CHECK-NEXT: mov w9, #8 // =0x8
354+
; CHECK-NEXT: whilehi p0.b, x9, x8
503355
; CHECK-NEXT: ret
504356
entry:
505357
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2)
506358
ret <vscale x 16 x i1> %out
507359
}
508360

509-
define <vscale x 16 x i1> @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
510-
; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
511-
; CHECK: // %bb.0: // %entry
512-
; CHECK-NEXT: mov w8, #9
513-
; CHECK-NEXT: whilehi p0.b, x8, xzr
514-
; CHECK-NEXT: ret
515-
entry:
516-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0)
517-
ret <vscale x 16 x i1> %out
518-
}
519-
520-
define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) {
521-
; CHECK-LABEL: whilehi_b_ii_vl_maximum:
522-
; CHECK: // %bb.0:
523-
; CHECK-NEXT: ptrue p0.b, vl256
524-
; CHECK-NEXT: ret
525-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0)
526-
ret <vscale x 16 x i1> %out
527-
}
528-
529-
define <vscale x 16 x i1> @whilelhi_b_ii_dont_fold_to_ptrue_overflow() {
530-
; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow:
531-
; CHECK: // %bb.0: // %entry
532-
; CHECK-NEXT: mov w8, #-1
533-
; CHECK-NEXT: mov w9, #7
534-
; CHECK-NEXT: whilehi p0.b, w9, w8
535-
; CHECK-NEXT: ret
536-
entry:
537-
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295)
538-
ret <vscale x 16 x i1> %out
539-
}
540-
541361
declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)
542362
declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)
543363
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)

0 commit comments

Comments
 (0)