Skip to content

Commit 780a511

Browse files
authored
[AArch64] Fix condition for combining UADDV and Add. (#76809)
This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts. Hopefully fixes #76769
1 parent 274f833 commit 780a511

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16516,9 +16516,9 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
1651616516
if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
1651716517
VT.getVectorNumElements() * 2)
1651816518
return SDValue();
16519-
if ((Ext0.getConstantOperandVal(1) != 0 &&
16519+
if ((Ext0.getConstantOperandVal(1) != 0 ||
1652016520
Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
16521-
(Ext1.getConstantOperandVal(1) != 0 &&
16521+
(Ext1.getConstantOperandVal(1) != 0 ||
1652216522
Ext0.getConstantOperandVal(1) != VT.getVectorNumElements()))
1652316523
return SDValue();
1652416524
unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6624,6 +6624,74 @@ entry:
66246624
ret i32 %op.rdx.7
66256625
}
66266626

6627+
define i32 @extract_hi_lo(<8 x i16> %a) {
6628+
; CHECK-SD-BASE-LABEL: extract_hi_lo:
6629+
; CHECK-SD-BASE: // %bb.0: // %entry
6630+
; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
6631+
; CHECK-SD-BASE-NEXT: fmov w0, s0
6632+
; CHECK-SD-BASE-NEXT: ret
6633+
;
6634+
; CHECK-SD-DOT-LABEL: extract_hi_lo:
6635+
; CHECK-SD-DOT: // %bb.0: // %entry
6636+
; CHECK-SD-DOT-NEXT: uaddlv s0, v0.8h
6637+
; CHECK-SD-DOT-NEXT: fmov w0, s0
6638+
; CHECK-SD-DOT-NEXT: ret
6639+
;
6640+
; CHECK-GI-BASE-LABEL: extract_hi_lo:
6641+
; CHECK-GI-BASE: // %bb.0: // %entry
6642+
; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
6643+
; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
6644+
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
6645+
; CHECK-GI-BASE-NEXT: fmov w0, s0
6646+
; CHECK-GI-BASE-NEXT: ret
6647+
;
6648+
; CHECK-GI-DOT-LABEL: extract_hi_lo:
6649+
; CHECK-GI-DOT: // %bb.0: // %entry
6650+
; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
6651+
; CHECK-GI-DOT-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
6652+
; CHECK-GI-DOT-NEXT: addv s0, v0.4s
6653+
; CHECK-GI-DOT-NEXT: fmov w0, s0
6654+
; CHECK-GI-DOT-NEXT: ret
6655+
entry:
6656+
%e1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6657+
%e2 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6658+
%z1 = zext <4 x i16> %e1 to <4 x i32>
6659+
%z2 = zext <4 x i16> %e2 to <4 x i32>
6660+
%z4 = add <4 x i32> %z1, %z2
6661+
%z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
6662+
ret i32 %z5
6663+
}
6664+
6665+
define i32 @extract_hi_hi(<8 x i16> %a) {
6666+
; CHECK-LABEL: extract_hi_hi:
6667+
; CHECK: // %bb.0: // %entry
6668+
; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v0.8h
6669+
; CHECK-NEXT: addv s0, v0.4s
6670+
; CHECK-NEXT: fmov w0, s0
6671+
; CHECK-NEXT: ret
6672+
entry:
6673+
%e2 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6674+
%z2 = zext <4 x i16> %e2 to <4 x i32>
6675+
%z4 = add <4 x i32> %z2, %z2
6676+
%z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
6677+
ret i32 %z5
6678+
}
6679+
6680+
define i32 @extract_lo_lo(<8 x i16> %a) {
6681+
; CHECK-LABEL: extract_lo_lo:
6682+
; CHECK: // %bb.0: // %entry
6683+
; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
6684+
; CHECK-NEXT: addv s0, v0.4s
6685+
; CHECK-NEXT: fmov w0, s0
6686+
; CHECK-NEXT: ret
6687+
entry:
6688+
%e1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6689+
%z1 = zext <4 x i16> %e1 to <4 x i32>
6690+
%z4 = add <4 x i32> %z1, %z1
6691+
%z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
6692+
ret i32 %z5
6693+
}
6694+
66276695
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
66286696
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
66296697
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)

0 commit comments

Comments
 (0)