[AArch64] Fix condition for combining UADDV and Add. #76809

davemgreen · 2024-01-03T11:32:25Z

This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts.

Hopefully fixes #76769

This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts.

llvmbot · 2024-01-03T11:32:55Z

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts.

Full diff: https://github.com/llvm/llvm-project/pull/76809.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-2)
(modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+68)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..3583b7d2ce4e71 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16516,9 +16516,9 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
     if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
         VT.getVectorNumElements() * 2)
       return SDValue();
-    if ((Ext0.getConstantOperandVal(1) != 0 &&
+    if ((Ext0.getConstantOperandVal(1) != 0 ||
          Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
-        (Ext1.getConstantOperandVal(1) != 0 &&
+        (Ext1.getConstantOperandVal(1) != 0 ||
          Ext0.getConstantOperandVal(1) != VT.getVectorNumElements()))
       return SDValue();
     unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index b24967d5a130ed..5fa28f77dc285f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -6624,6 +6624,74 @@ entry:
   ret i32 %op.rdx.7
 }
 
+define i32 @extract_hi_lo(<8 x i16> %a) {
+; CHECK-SD-BASE-LABEL: extract_hi_lo:
+; CHECK-SD-BASE:       // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT:    uaddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT:    fmov w0, s0
+; CHECK-SD-BASE-NEXT:    ret
+;
+; CHECK-SD-DOT-LABEL: extract_hi_lo:
+; CHECK-SD-DOT:       // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT:    uaddlv s0, v0.8h
+; CHECK-SD-DOT-NEXT:    fmov w0, s0
+; CHECK-SD-DOT-NEXT:    ret
+;
+; CHECK-GI-BASE-LABEL: extract_hi_lo:
+; CHECK-GI-BASE:       // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT:    addv s0, v0.4s
+; CHECK-GI-BASE-NEXT:    fmov w0, s0
+; CHECK-GI-BASE-NEXT:    ret
+;
+; CHECK-GI-DOT-LABEL: extract_hi_lo:
+; CHECK-GI-DOT:       // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-DOT-NEXT:    addv s0, v0.4s
+; CHECK-GI-DOT-NEXT:    fmov w0, s0
+; CHECK-GI-DOT-NEXT:    ret
+entry:
+  %e1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %e2 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %z1 = zext <4 x i16> %e1 to <4 x i32>
+  %z2 = zext <4 x i16> %e2 to <4 x i32>
+  %z4 = add <4 x i32> %z1, %z2
+  %z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
+  ret i32 %z5
+}
+
+define i32 @extract_hi_hi(<8 x i16> %a) {
+; CHECK-LABEL: extract_hi_hi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl2 v0.4s, v0.8h, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %e2 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %z2 = zext <4 x i16> %e2 to <4 x i32>
+  %z4 = add <4 x i32> %z2, %z2
+  %z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
+  ret i32 %z5
+}
+
+define i32 @extract_lo_lo(<8 x i16> %a) {
+; CHECK-LABEL: extract_lo_lo:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v0.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %e1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %z1 = zext <4 x i16> %e1 to <4 x i32>
+  %z4 = add <4 x i32> %z1, %z1
+  %z5 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z4)
+  ret i32 %z5
+}
+
 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)

sjoerdmeijer · 2024-01-05T16:22:15Z

Looks like a good fix.

This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts. Hopefully fixes llvm#76769

[AArch64] Fix condition for combining UADDV and Add.

64c0fb0

This should have been checking that the transform was valid, but used incorrect conditions letting through invalid combinations of lo/hi extracts.

davemgreen requested review from aemerson, SamTebbs33 and sjoerdmeijer January 3, 2024 11:32

llvmbot added the backend:AArch64 label Jan 3, 2024

sjoerdmeijer approved these changes Jan 5, 2024

View reviewed changes

davemgreen merged commit 780a511 into llvm:main Jan 7, 2024

davemgreen deleted the gh-a64-uaddlvhilo branch January 7, 2024 08:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Fix condition for combining UADDV and Add. #76809

[AArch64] Fix condition for combining UADDV and Add. #76809

Uh oh!

davemgreen commented Jan 3, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jan 3, 2024

Uh oh!

sjoerdmeijer commented Jan 5, 2024

Uh oh!

Uh oh!

[AArch64] Fix condition for combining UADDV and Add. #76809

[AArch64] Fix condition for combining UADDV and Add. #76809

Uh oh!

Conversation

davemgreen commented Jan 3, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jan 3, 2024

Uh oh!

sjoerdmeijer commented Jan 5, 2024

Uh oh!

Uh oh!

davemgreen commented Jan 3, 2024 •

edited

Loading