Skip to content

Commit 2640277

Browse files
authored
[AArch64] Optimized generated assembly for bool to svbool_t conversions (#83001)
In certain cases Legalizer was generating `AND(WHILELO, SPLAT 1)` instruction pattern, when `WHILELO` would be sufficient.
1 parent cd344a4 commit 2640277

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,9 @@ static bool isMergePassthruOpcode(unsigned Opc) {
271271
static bool isZeroingInactiveLanes(SDValue Op) {
272272
switch (Op.getOpcode()) {
273273
default:
274-
// We guarantee i1 splat_vectors to zero the other lanes by
275-
// implementing it with ptrue and possibly a punpklo for nxv1i1.
276-
if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
277-
return true;
278274
return false;
275+
// We guarantee i1 splat_vectors to zero the other lanes
276+
case ISD::SPLAT_VECTOR:
279277
case AArch64ISD::PTRUE:
280278
case AArch64ISD::SETCC_MERGE_ZERO:
281279
return true;

llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
33
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
44

@@ -150,6 +150,46 @@ define <vscale x 16 x i1> @chained_reinterpret() {
150150
ret <vscale x 16 x i1> %out
151151
}
152152

153+
define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
154+
; CHECK-LABEL: reinterpret_scalar_bool_h:
155+
; CHECK: // %bb.0:
156+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
157+
; CHECK-NEXT: sbfx x8, x0, #0, #1
158+
; CHECK-NEXT: whilelo p0.h, xzr, x8
159+
; CHECK-NEXT: ret
160+
%.splatinsert = insertelement <vscale x 8 x i1> poison, i1 %x, i64 0
161+
%.splat = shufflevector <vscale x 8 x i1> %.splatinsert, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
162+
%out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %.splat)
163+
ret <vscale x 16 x i1> %out
164+
}
165+
166+
define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
167+
; CHECK-LABEL: reinterpret_scalar_bool_s:
168+
; CHECK: // %bb.0:
169+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
170+
; CHECK-NEXT: sbfx x8, x0, #0, #1
171+
; CHECK-NEXT: whilelo p0.s, xzr, x8
172+
; CHECK-NEXT: ret
173+
%.splatinsert = insertelement <vscale x 4 x i1> poison, i1 %x, i64 0
174+
%.splat = shufflevector <vscale x 4 x i1> %.splatinsert, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
175+
%out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %.splat)
176+
ret <vscale x 16 x i1> %out
177+
}
178+
179+
define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
180+
; CHECK-LABEL: reinterpret_scalar_bool_q:
181+
; CHECK: // %bb.0:
182+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
183+
; CHECK-NEXT: sbfx x8, x0, #0, #1
184+
; CHECK-NEXT: whilelo p0.d, xzr, x8
185+
; CHECK-NEXT: ret
186+
%.splatinsert = insertelement <vscale x 2 x i1> poison, i1 %x, i64 0
187+
%.splat = shufflevector <vscale x 2 x i1> %.splatinsert, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
188+
%out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %.splat)
189+
ret <vscale x 16 x i1> %out
190+
}
191+
192+
153193
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
154194
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
155195
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)

0 commit comments

Comments
 (0)