Skip to content

Commit 2deb10c

Browse files
committed
[AArch64][SVE] Fix crash for DestructiveBinaryComm zero merging
This fix is similar to D124325, and I find the DestructiveBinaryComm operation type also may be allocated same register, so insert the LSL. movprfx z0.s, p0/z, z0.s lsl z0.b, p0/m, z0.b, #0 fmul z0.s, p0/m, z0.s, z0.s Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D141471
1 parent da78ae4 commit 2deb10c

File tree

2 files changed

+44
-2
lines changed

2 files changed

+44
-2
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,8 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
558558
if (FalseZero) {
559559
// If we cannot prefix the requested instruction we'll instead emit a
560560
// prefixed_zeroing_mov for DestructiveBinary.
561-
assert((DOPRegIsUnique || AArch64::DestructiveBinary == DType) &&
561+
assert((DOPRegIsUnique || ((DType == AArch64::DestructiveBinary) ||
562+
(DType = AArch64::DestructiveBinaryComm))) &&
562563
"The destructive operand should be unique");
563564
assert(ElementSize != AArch64::ElementSizeNone &&
564565
"This instruction is unpredicated");
@@ -575,7 +576,9 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
575576
// Create the additional LSL to zero the lanes when the DstReg is not
576577
// unique. Zeros the lanes in z0 that aren't active in p0 with sequence
577578
// movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
578-
if (DType == AArch64::DestructiveBinary && !DOPRegIsUnique) {
579+
if (((DType == AArch64::DestructiveBinary) ||
580+
(DType == AArch64::DestructiveBinaryComm)) &&
581+
!DOPRegIsUnique) {
579582
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
580583
.addReg(DstReg, RegState::Define)
581584
.add(MI.getOperand(PredIdx))
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+use-experimental-zeroing-pseudos -run-pass=aarch64-expand-pseudo %s -o - | FileCheck %s
3+
4+
# Should create an additional LSL to zero the lanes as the DstReg is not unique
5+
6+
--- |
7+
define <vscale x 4 x float> @fmul_float_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a){
8+
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
9+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a_z, <vscale x 4 x float> %a_z)
10+
ret <vscale x 4 x float> %out
11+
}
12+
13+
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
14+
...
15+
---
16+
name: fmul_float_zero
17+
alignment: 4
18+
tracksRegLiveness: true
19+
tracksDebugUserValues: true
20+
registers: []
21+
liveins:
22+
- { reg: '$p0', virtual-reg: '' }
23+
- { reg: '$z0', virtual-reg: '' }
24+
body: |
25+
bb.0 (%ir-block.0):
26+
liveins: $p0, $z0
27+
28+
; CHECK-LABEL: name: fmul_float_zero
29+
; CHECK: liveins: $p0, $z0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $z0_hi, implicit $p0, implicit $z0 {
32+
; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_S $p0, $z0
33+
; CHECK-NEXT: $z0 = LSL_ZPmI_S renamable $p0, internal $z0, 0
34+
; CHECK-NEXT: $z0 = FMUL_ZPmZ_S renamable $p0, internal killed $z0, internal killed renamable $z0
35+
; CHECK-NEXT: }
36+
; CHECK-NEXT: RET undef $lr, implicit $z0
37+
renamable $z0 = nnan ninf nsz arcp contract afn reassoc FMUL_ZPZZ_ZERO_S renamable $p0, killed renamable $z0, renamable $z0
38+
RET_ReallyLR implicit $z0
39+
...

0 commit comments

Comments
 (0)