[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054

chuongg3 · 2024-04-25T13:59:29Z

No description provided.

llvmbot · 2024-04-25T14:00:04Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: None (chuongg3)

Changes

Patch is 62.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90054.diff

6 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+46)
(modified) llvm/test/CodeGen/AArch64/aarch64-addv.ll (+12-13)
(modified) llvm/test/CodeGen/AArch64/arm64-vabs.ll (+105-122)
(modified) llvm/test/CodeGen/AArch64/icmp.ll (+664)
(modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+11-26)
(modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+140-350)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7abb58064a535..bf331b97069ba2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5397,6 +5397,52 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
 def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
+multiclass SelectSetCCZeroRHS<PatFrags InFrag, string INST> {
+  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)),
+            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)),
+            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)),
+            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)),
+            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)),
+            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)),
+            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)),
+            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroRHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroRHS<setgt, "CMGT">;
+defm : SelectSetCCZeroRHS<setge, "CMGE">;
+defm : SelectSetCCZeroRHS<setlt, "CMLT">;
+defm : SelectSetCCZeroRHS<setle, "CMLE">;
+
+multiclass SelectSetCCZeroLHS<PatFrags InFrag, string INST> {
+  def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))),
+            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+  def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))),
+            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+  def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))),
+            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+  def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))),
+            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+  def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))),
+            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+  def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))),
+            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+  def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))),
+            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroLHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroLHS<setgt, "CMLT">;
+defm : SelectSetCCZeroLHS<setge, "CMLE">;
+defm : SelectSetCCZeroLHS<setlt, "CMGT">;
+defm : SelectSetCCZeroLHS<setle, "CMGE">;
+
 let Predicates = [HasNEON] in {
 def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
                 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index ee035ec1941d57..94b792b887eb47 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -94,20 +94,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
 ;
 ; GISEL-LABEL: oversized_ADDV_256:
 ; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    ldr d1, [x0]
-; GISEL-NEXT:    ldr d2, [x1]
-; GISEL-NEXT:    movi v0.2d, #0000000000000000
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    ldr d1, [x1]
+; GISEL-NEXT:    ushll v0.8h, v0.8b, #0
 ; GISEL-NEXT:    ushll v1.8h, v1.8b, #0
-; GISEL-NEXT:    ushll v2.8h, v2.8b, #0
-; GISEL-NEXT:    usubl v3.4s, v1.4h, v2.4h
-; GISEL-NEXT:    usubl2 v1.4s, v1.8h, v2.8h
-; GISEL-NEXT:    cmgt v2.4s, v0.4s, v3.4s
-; GISEL-NEXT:    cmgt v0.4s, v0.4s, v1.4s
-; GISEL-NEXT:    neg v4.4s, v3.4s
-; GISEL-NEXT:    neg v5.4s, v1.4s
-; GISEL-NEXT:    bsl v2.16b, v4.16b, v3.16b
-; GISEL-NEXT:    bsl v0.16b, v5.16b, v1.16b
-; GISEL-NEXT:    add v0.4s, v2.4s, v0.4s
+; GISEL-NEXT:    usubl v2.4s, v0.4h, v1.4h
+; GISEL-NEXT:    usubl2 v0.4s, v0.8h, v1.8h
+; GISEL-NEXT:    cmlt v1.4s, v2.4s, #0
+; GISEL-NEXT:    cmlt v3.4s, v0.4s, #0
+; GISEL-NEXT:    neg v4.4s, v2.4s
+; GISEL-NEXT:    neg v5.4s, v0.4s
+; GISEL-NEXT:    bsl v1.16b, v4.16b, v2.16b
+; GISEL-NEXT:    bit v0.16b, v5.16b, v3.16b
+; GISEL-NEXT:    add v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    addv s0, v0.4s
 ; GISEL-NEXT:    fmov w0, s0
 ; GISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index d64327656a9e01..f7d31a214563bc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -252,18 +252,17 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
 ;
 ; CHECK-GI-LABEL: uabd16b_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr q1, [x0]
-; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT:    usubl.8h v3, v1, v2
-; CHECK-GI-NEXT:    usubl2.8h v1, v1, v2
-; CHECK-GI-NEXT:    cmgt.8h v2, v0, v3
-; CHECK-GI-NEXT:    cmgt.8h v0, v0, v1
-; CHECK-GI-NEXT:    neg.8h v4, v3
-; CHECK-GI-NEXT:    neg.8h v5, v1
-; CHECK-GI-NEXT:    bsl.16b v2, v4, v3
-; CHECK-GI-NEXT:    bsl.16b v0, v5, v1
-; CHECK-GI-NEXT:    add.8h v0, v2, v0
+; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    ldr q1, [x1]
+; CHECK-GI-NEXT:    usubl.8h v2, v0, v1
+; CHECK-GI-NEXT:    usubl2.8h v0, v0, v1
+; CHECK-GI-NEXT:    cmlt.8h v1, v2, #0
+; CHECK-GI-NEXT:    cmlt.8h v3, v0, #0
+; CHECK-GI-NEXT:    neg.8h v4, v2
+; CHECK-GI-NEXT:    neg.8h v5, v0
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v2
+; CHECK-GI-NEXT:    bit.16b v0, v5, v3
+; CHECK-GI-NEXT:    add.8h v0, v1, v0
 ; CHECK-GI-NEXT:    addv.8h h0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -290,29 +289,28 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: uabd16b_rdx_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll.8h v3, v0, #0
-; CHECK-GI-NEXT:    ushll.8h v4, v1, #0
+; CHECK-GI-NEXT:    ushll.8h v2, v0, #0
+; CHECK-GI-NEXT:    ushll.8h v3, v1, #0
 ; CHECK-GI-NEXT:    ushll2.8h v0, v0, #0
 ; CHECK-GI-NEXT:    ushll2.8h v1, v1, #0
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    usubl.4s v5, v3, v4
-; CHECK-GI-NEXT:    usubl2.4s v3, v3, v4
-; CHECK-GI-NEXT:    usubl.4s v4, v0, v1
+; CHECK-GI-NEXT:    usubl.4s v4, v2, v3
+; CHECK-GI-NEXT:    usubl2.4s v2, v2, v3
+; CHECK-GI-NEXT:    usubl.4s v3, v0, v1
 ; CHECK-GI-NEXT:    usubl2.4s v0, v0, v1
-; CHECK-GI-NEXT:    cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT:    cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT:    neg.4s v16, v5
-; CHECK-GI-NEXT:    cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT:    neg.4s v17, v3
-; CHECK-GI-NEXT:    neg.4s v18, v4
+; CHECK-GI-NEXT:    cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT:    cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT:    neg.4s v16, v4
+; CHECK-GI-NEXT:    cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT:    cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT:    neg.4s v17, v2
+; CHECK-GI-NEXT:    neg.4s v18, v3
 ; CHECK-GI-NEXT:    neg.4s v19, v0
-; CHECK-GI-NEXT:    bsl.16b v1, v16, v5
-; CHECK-GI-NEXT:    bit.16b v3, v17, v6
-; CHECK-GI-NEXT:    bit.16b v4, v18, v7
-; CHECK-GI-NEXT:    bit.16b v0, v19, v2
-; CHECK-GI-NEXT:    add.4s v1, v1, v3
-; CHECK-GI-NEXT:    add.4s v0, v4, v0
+; CHECK-GI-NEXT:    bsl.16b v1, v16, v4
+; CHECK-GI-NEXT:    bit.16b v2, v17, v5
+; CHECK-GI-NEXT:    bit.16b v3, v18, v6
+; CHECK-GI-NEXT:    bit.16b v0, v19, v7
+; CHECK-GI-NEXT:    add.4s v1, v1, v2
+; CHECK-GI-NEXT:    add.4s v0, v3, v0
 ; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
@@ -338,29 +336,28 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: sabd16b_rdx_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll.8h v3, v0, #0
-; CHECK-GI-NEXT:    sshll.8h v4, v1, #0
+; CHECK-GI-NEXT:    sshll.8h v2, v0, #0
+; CHECK-GI-NEXT:    sshll.8h v3, v1, #0
 ; CHECK-GI-NEXT:    sshll2.8h v0, v0, #0
 ; CHECK-GI-NEXT:    sshll2.8h v1, v1, #0
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    ssubl.4s v5, v3, v4
-; CHECK-GI-NEXT:    ssubl2.4s v3, v3, v4
-; CHECK-GI-NEXT:    ssubl.4s v4, v0, v1
+; CHECK-GI-NEXT:    ssubl.4s v4, v2, v3
+; CHECK-GI-NEXT:    ssubl2.4s v2, v2, v3
+; CHECK-GI-NEXT:    ssubl.4s v3, v0, v1
 ; CHECK-GI-NEXT:    ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT:    cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT:    cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT:    neg.4s v16, v5
-; CHECK-GI-NEXT:    cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT:    neg.4s v17, v3
-; CHECK-GI-NEXT:    neg.4s v18, v4
+; CHECK-GI-NEXT:    cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT:    cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT:    neg.4s v16, v4
+; CHECK-GI-NEXT:    cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT:    cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT:    neg.4s v17, v2
+; CHECK-GI-NEXT:    neg.4s v18, v3
 ; CHECK-GI-NEXT:    neg.4s v19, v0
-; CHECK-GI-NEXT:    bsl.16b v1, v16, v5
-; CHECK-GI-NEXT:    bit.16b v3, v17, v6
-; CHECK-GI-NEXT:    bit.16b v4, v18, v7
-; CHECK-GI-NEXT:    bit.16b v0, v19, v2
-; CHECK-GI-NEXT:    add.4s v1, v1, v3
-; CHECK-GI-NEXT:    add.4s v0, v4, v0
+; CHECK-GI-NEXT:    bsl.16b v1, v16, v4
+; CHECK-GI-NEXT:    bit.16b v2, v17, v5
+; CHECK-GI-NEXT:    bit.16b v3, v18, v6
+; CHECK-GI-NEXT:    bit.16b v0, v19, v7
+; CHECK-GI-NEXT:    add.4s v1, v1, v2
+; CHECK-GI-NEXT:    add.4s v0, v3, v0
 ; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
@@ -391,18 +388,17 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
 ;
 ; CHECK-GI-LABEL: uabd8h_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr q1, [x0]
-; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT:    usubl.4s v3, v1, v2
-; CHECK-GI-NEXT:    usubl2.4s v1, v1, v2
-; CHECK-GI-NEXT:    cmgt.4s v2, v0, v3
-; CHECK-GI-NEXT:    cmgt.4s v0, v0, v1
-; CHECK-GI-NEXT:    neg.4s v4, v3
-; CHECK-GI-NEXT:    neg.4s v5, v1
-; CHECK-GI-NEXT:    bsl.16b v2, v4, v3
-; CHECK-GI-NEXT:    bsl.16b v0, v5, v1
-; CHECK-GI-NEXT:    add.4s v0, v2, v0
+; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    ldr q1, [x1]
+; CHECK-GI-NEXT:    usubl.4s v2, v0, v1
+; CHECK-GI-NEXT:    usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT:    cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT:    cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT:    neg.4s v4, v2
+; CHECK-GI-NEXT:    neg.4s v5, v0
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v2
+; CHECK-GI-NEXT:    bit.16b v0, v5, v3
+; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -428,15 +424,14 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
 ;
 ; CHECK-GI-LABEL: sabd8h_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    ssubl.4s v3, v0, v1
+; CHECK-GI-NEXT:    ssubl.4s v2, v0, v1
 ; CHECK-GI-NEXT:    ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT:    neg.4s v4, v3
+; CHECK-GI-NEXT:    cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT:    cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT:    neg.4s v4, v2
 ; CHECK-GI-NEXT:    neg.4s v5, v0
-; CHECK-GI-NEXT:    cmgt.4s v1, v2, v3
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT:    bsl.16b v1, v4, v3
-; CHECK-GI-NEXT:    bit.16b v0, v5, v2
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v2
+; CHECK-GI-NEXT:    bit.16b v0, v5, v3
 ; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
@@ -461,9 +456,8 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
 ;
 ; CHECK-GI-LABEL: uabdl4s_rdx_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
 ; CHECK-GI-NEXT:    usubl.4s v0, v0, v1
-; CHECK-GI-NEXT:    cmgt.4s v1, v2, v0
+; CHECK-GI-NEXT:    cmlt.4s v1, v0, #0
 ; CHECK-GI-NEXT:    neg.4s v2, v0
 ; CHECK-GI-NEXT:    bit.16b v0, v2, v1
 ; CHECK-GI-NEXT:    addv.4s s0, v0
@@ -494,18 +488,17 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
 ;
 ; CHECK-GI-LABEL: uabd4s_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr q1, [x0]
-; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT:    usubl.2d v3, v1, v2
-; CHECK-GI-NEXT:    usubl2.2d v1, v1, v2
-; CHECK-GI-NEXT:    cmgt.2d v2, v0, v3
-; CHECK-GI-NEXT:    cmgt.2d v0, v0, v1
-; CHECK-GI-NEXT:    neg.2d v4, v3
-; CHECK-GI-NEXT:    neg.2d v5, v1
-; CHECK-GI-NEXT:    bsl.16b v2, v4, v3
-; CHECK-GI-NEXT:    bsl.16b v0, v5, v1
-; CHECK-GI-NEXT:    add.2d v0, v2, v0
+; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    ldr q1, [x1]
+; CHECK-GI-NEXT:    usubl.2d v2, v0, v1
+; CHECK-GI-NEXT:    usubl2.2d v0, v0, v1
+; CHECK-GI-NEXT:    cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT:    cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT:    neg.2d v4, v2
+; CHECK-GI-NEXT:    neg.2d v5, v0
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v2
+; CHECK-GI-NEXT:    bit.16b v0, v5, v3
+; CHECK-GI-NEXT:    add.2d v0, v1, v0
 ; CHECK-GI-NEXT:    addp.2d d0, v0
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    ret
@@ -531,15 +524,14 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: sabd4s_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    ssubl.2d v3, v0, v1
+; CHECK-GI-NEXT:    ssubl.2d v2, v0, v1
 ; CHECK-GI-NEXT:    ssubl2.2d v0, v0, v1
-; CHECK-GI-NEXT:    neg.2d v4, v3
+; CHECK-GI-NEXT:    cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT:    cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT:    neg.2d v4, v2
 ; CHECK-GI-NEXT:    neg.2d v5, v0
-; CHECK-GI-NEXT:    cmgt.2d v1, v2, v3
-; CHECK-GI-NEXT:    cmgt.2d v2, v2, v0
-; CHECK-GI-NEXT:    bsl.16b v1, v4, v3
-; CHECK-GI-NEXT:    bit.16b v0, v5, v2
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v2
+; CHECK-GI-NEXT:    bit.16b v0, v5, v3
 ; CHECK-GI-NEXT:    add.2d v0, v1, v0
 ; CHECK-GI-NEXT:    addp.2d d0, v0
 ; CHECK-GI-NEXT:    fmov x0, d0
@@ -564,9 +556,8 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: uabdl2d_rdx_i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
 ; CHECK-GI-NEXT:    usubl.2d v0, v0, v1
-; CHECK-GI-NEXT:    cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT:    cmlt.2d v1, v0, #0
 ; CHECK-GI-NEXT:    neg.2d v2, v0
 ; CHECK-GI-NEXT:    bit.16b v0, v2, v1
 ; CHECK-GI-NEXT:    addp.2d d0, v0
@@ -1662,10 +1653,9 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.2s v2, v0
-; CHECK-GI-NEXT:    cmge.2s v1, v0, v1
-; CHECK-GI-NEXT:    bif.8b v0, v2, v1
+; CHECK-GI-NEXT:    neg.2s v1, v0
+; CHECK-GI-NEXT:    cmge.2s v2, v0, #0
+; CHECK-GI-NEXT:    bif.8b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <2 x i32> zeroinitializer, %a
   %b = icmp sge <2 x i32> %a, zeroinitializer
@@ -1682,10 +1672,9 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.4h v2, v0
-; CHECK-GI-NEXT:    cmgt.4h v1, v0, v1
-; CHECK-GI-NEXT:    bif.8b v0, v2, v1
+; CHECK-GI-NEXT:    neg.4h v1, v0
+; CHECK-GI-NEXT:    cmgt.4h v2, v0, #0
+; CHECK-GI-NEXT:    bif.8b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <4 x i16> zeroinitializer, %a
   %b = icmp sgt <4 x i16> %a, zeroinitializer
@@ -1701,10 +1690,9 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern3:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.8b v2, v0
-; CHECK-GI-NEXT:    cmgt.8b v1, v1, v0
-; CHECK-GI-NEXT:    bit.8b v0, v2, v1
+; CHECK-GI-NEXT:    neg.8b v1, v0
+; CHECK-GI-NEXT:    cmlt.8b v2, v0, #0
+; CHECK-GI-NEXT:    bit.8b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <8 x i8> zeroinitializer, %a
   %b = icmp slt <8 x i8> %a, zeroinitializer
@@ -1720,10 +1708,9 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.4s v2, v0
-; CHECK-GI-NEXT:    cmge.4s v1, v0, v1
-; CHECK-GI-NEXT:    bif.16b v0, v2, v1
+; CHECK-GI-NEXT:    neg.4s v1, v0
+; CHECK-GI-NEXT:    cmge.4s v2, v0, #0
+; CHECK-GI-NEXT:    bif.16b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <4 x i32> zeroinitializer, %a
   %b = icmp sge <4 x i32> %a, zeroinitializer
@@ -1739,10 +1726,9 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern5:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.8h v2, v0
-; CHECK-GI-NEXT:    cmgt.8h v1, v0, v1
-; CHECK-GI-NEXT:    bif.16b v0, v2, v1
+; CHECK-GI-NEXT:    neg.8h v1, v0
+; CHECK-GI-NEXT:    cmgt.8h v2, v0, #0
+; CHECK-GI-NEXT:    bif.16b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <8 x i16> zeroinitializer, %a
   %b = icmp sgt <8 x i16> %a, zeroinitializer
@@ -1758,10 +1744,9 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern6:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.16b v2, v0
-; CHECK-GI-NEXT:    cmgt.16b v1, v1, v0
-; CHECK-GI-NEXT:    bit.16b v0, v2, v1
+; CHECK-GI-NEXT:    neg.16b v1, v0
+; CHECK-GI-NEXT:    cmlt.16b v2, v0, #0
+; CHECK-GI-NEXT:    bit.16b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <16 x i8> zeroinitializer, %a
   %b = icmp slt <16 x i8> %a, zeroinitializer
@@ -1777,10 +1762,9 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: abspattern7:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT:    neg.2d v2, v0
-; CHECK-GI-NEXT:    cmge.2d v1, v1, v0
-; CHECK-GI-NEXT:    bit.16b v0, v2, v1
+; CHECK-GI-NEXT:    neg.2d v1, v0
+; CHECK-GI-NEXT:    cmle.2d v2, v0, #0
+; CHECK-GI-NEXT:    bit.16b v0, v1, v2
 ; CHECK-GI-NEXT:    ret
   %tmp1neg = sub <2 x i64> zeroinitializer, %a
   %b = icmp sle <2 x i64> %a, zeroinitializer
@@ -1796,9 +1780,8 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: uabd_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
 ; CHECK-GI-NEXT:    ssubl.2d v0, v0, v1
-; CHECK-GI-NEXT:    cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT:    cmlt.2d v1, v0, #0
 ; CHECK-GI-NEXT:    neg.2d v2, v0
 ; CHECK-GI-NEXT:    bit.16b v0, v2, v1
 ; CHECK-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 8e10847e7aae34..06e69572bc5779 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -319,3 +319,667 @@ entry:
   %s = select <32 x i1> %c, <32 x i8> %d, <32 x i8> %e
   ret <32 x i8> %s
 }
+
+; ===== ICMP Zero RHS =====
+
+define <8 x i1> @icmp_eq_v8i8_Zero_RHS(<8 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v8i8_Zero_RHS:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmeq v0.8b, v0.8b, #0
+; CHECK-NEXT:    ret
+    %c = icmp eq <8 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+    ret <8 x i1> %c
+}
+
+define <16 x i1> @icmp_eq_v16i8_Zero_RHS(<16 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v16i8_Zero_RHS:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmeq v0.16b, v0.16b, #0
+; CHECK-NEXT:    ret
+    %c = icmp eq <16 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+    ret <16 x i1> %c
+}
+
+define <4 x i1> @icmp_eq_v4i16_Zero_RHS(<4 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v4i16_Zero_RHS:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmeq v0.4h, v0.4h, #0
+; CHECK-NEXT:    ret
+    %c = icmp eq <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
+    ret <4 x i1> %c
+}
+
+define <8 x i1> @icmp_eq_v8i16_Zero_RHS(<8 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v8i16_Zero_RHS:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmeq v0.8h, v0.8h, #0
+; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    ret
+    %c = icmp eq <8 x i16> %a, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+    ret <8 x i1> %c
+}
+
+define <2 x i1> @icmp_eq_v2i32_Zero_RHS(<2 x i32> %a) {
+; CHECK-LABEL: icmp_eq_v2i32_Zero_RHS:
+; CHECK:       /...
[truncated]

aemerson · 2024-04-27T04:21:49Z

If these patterns are only applicable to GlobalISel then you can move them to AArch64InstrGISel.td

chuongg3 · 2024-04-29T13:56:35Z

If these patterns are only applicable to GlobalISel then you can move them to AArch64InstrGISel.td

It might be better if we do not special case GlobalISel unless necessary and integrate it more into existing patterns. What do you think?

aemerson · 2024-04-29T15:19:28Z

Is there any effect on SDAG codegen? If so then we can keep it in InstrInfo but if these new patterns only influence GlobalISel then I think it makes sense to separate them to avoid confusion about their purpose.

davemgreen · 2024-04-29T16:44:32Z

Is there any effect on SDAG codegen? If so then we can keep it in InstrInfo but if these new patterns only influence GlobalISel then I think it makes sense to separate them to avoid confusion about their purpose.

Hi - I'm not sure if it is best to keep them separate, and continue treating GlobalISel as a special thing that is off in its own corner. It feels better in the long run to treat these as standard patterns, put where they belong compared to all the other instructions. They should probably be documented that they apply to GISel to keep it clear, and my opinions on this are not very strongly held. #89932 is probably the best place to mention if you think having the patterns separate is better.

arsenm · 2024-04-30T18:20:16Z

We do have a new control bit you can use to disable patterns from importing in GlobalIsel if the dag strategy is not useful here

aemerson · 2024-05-06T22:53:43Z

Is there any effect on SDAG codegen? If so then we can keep it in InstrInfo but if these new patterns only influence GlobalISel then I think it makes sense to separate them to avoid confusion about their purpose.

Hi - I'm not sure if it is best to keep them separate, and continue treating GlobalISel as a special thing that is off in its own corner. It feels better in the long run to treat these as standard patterns, put where they belong compared to all the other instructions. They should probably be documented that they apply to GISel to keep it clear, and my opinions on this are not very strongly held. #89932 is probably the best place to mention if you think having the patterns separate is better.

I can see the benefits of integration but if we do keep GISel patterns alongside the SDAG ones then comments indicating their purpose are a must IMO. @chuongg3 this is your call.

aemerson

LGTM, thanks. This is a nice cleanup.

…90054)

chuongg3 requested review from aemerson and davemgreen April 25, 2024 13:59

llvmbot added the backend:AArch64 label Apr 25, 2024

chuongg3 force-pushed the GlobalISel_ICMP_Zero branch from 257a3a8 to 0fa867d Compare April 29, 2024 14:23

llvmbot added the llvm:globalisel label Apr 29, 2024

chuongg3 force-pushed the GlobalISel_ICMP_Zero branch from 0fa867d to 2d7afdd Compare April 29, 2024 15:34

chuongg3 force-pushed the GlobalISel_ICMP_Zero branch from 2d7afdd to 46e83d9 Compare May 7, 2024 13:13

aemerson approved these changes May 7, 2024

View reviewed changes

chuongg3 added a commit that referenced this pull request May 13, 2024

[AArch64][NFC] Pre-commit tests for Select G_ICMP Zero Instruction (#…

d819772

…90054)

[AArch64][GlobalISel] Select G_ICMP Zero Instruction

9b71edf

chuongg3 force-pushed the GlobalISel_ICMP_Zero branch from 46e83d9 to 9b71edf Compare May 13, 2024 14:49

chuongg3 merged commit 0bc23f1 into llvm:main May 14, 2024
3 of 4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054

[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054

Uh oh!

chuongg3 commented Apr 25, 2024

Uh oh!

llvmbot commented Apr 25, 2024 •

edited

Loading

Uh oh!

aemerson commented Apr 27, 2024

Uh oh!

chuongg3 commented Apr 29, 2024

Uh oh!

aemerson commented Apr 29, 2024

Uh oh!

davemgreen commented Apr 29, 2024

Uh oh!

arsenm commented Apr 30, 2024

Uh oh!

aemerson commented May 6, 2024

Uh oh!

aemerson left a comment

Uh oh!

Uh oh!

Uh oh!

[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054

[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054

Uh oh!

Conversation

chuongg3 commented Apr 25, 2024

Uh oh!

llvmbot commented Apr 25, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

aemerson commented Apr 27, 2024

Uh oh!

chuongg3 commented Apr 29, 2024

Uh oh!

aemerson commented Apr 29, 2024

Uh oh!

davemgreen commented Apr 29, 2024

Uh oh!

arsenm commented Apr 30, 2024

Uh oh!

aemerson commented May 6, 2024

Uh oh!

aemerson left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Apr 25, 2024 •

edited

Loading