-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GlobalISel] Select G_ICMP Zero Instruction #90054
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesPatch is 62.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90054.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7abb58064a535..bf331b97069ba2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5397,6 +5397,52 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+multiclass SelectSetCCZeroRHS<PatFrags InFrag, string INST> {
+ def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)),
+ (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+ def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)),
+ (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+ def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)),
+ (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+ def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)),
+ (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+ def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)),
+ (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+ def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)),
+ (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+ def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)),
+ (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroRHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroRHS<setgt, "CMGT">;
+defm : SelectSetCCZeroRHS<setge, "CMGE">;
+defm : SelectSetCCZeroRHS<setlt, "CMLT">;
+defm : SelectSetCCZeroRHS<setle, "CMLE">;
+
+multiclass SelectSetCCZeroLHS<PatFrags InFrag, string INST> {
+ def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))),
+ (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+ def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))),
+ (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+ def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))),
+ (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+ def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))),
+ (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+ def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))),
+ (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+ def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))),
+ (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+ def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))),
+ (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroLHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroLHS<setgt, "CMLT">;
+defm : SelectSetCCZeroLHS<setge, "CMLE">;
+defm : SelectSetCCZeroLHS<setlt, "CMGT">;
+defm : SelectSetCCZeroLHS<setle, "CMGE">;
+
let Predicates = [HasNEON] in {
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index ee035ec1941d57..94b792b887eb47 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -94,20 +94,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
;
; GISEL-LABEL: oversized_ADDV_256:
; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: ldr d1, [x0]
-; GISEL-NEXT: ldr d2, [x1]
-; GISEL-NEXT: movi v0.2d, #0000000000000000
+; GISEL-NEXT: ldr d0, [x0]
+; GISEL-NEXT: ldr d1, [x1]
+; GISEL-NEXT: ushll v0.8h, v0.8b, #0
; GISEL-NEXT: ushll v1.8h, v1.8b, #0
-; GISEL-NEXT: ushll v2.8h, v2.8b, #0
-; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
-; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
-; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
-; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; GISEL-NEXT: neg v4.4s, v3.4s
-; GISEL-NEXT: neg v5.4s, v1.4s
-; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
-; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
-; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
+; GISEL-NEXT: usubl v2.4s, v0.4h, v1.4h
+; GISEL-NEXT: usubl2 v0.4s, v0.8h, v1.8h
+; GISEL-NEXT: cmlt v1.4s, v2.4s, #0
+; GISEL-NEXT: cmlt v3.4s, v0.4s, #0
+; GISEL-NEXT: neg v4.4s, v2.4s
+; GISEL-NEXT: neg v5.4s, v0.4s
+; GISEL-NEXT: bsl v1.16b, v4.16b, v2.16b
+; GISEL-NEXT: bit v0.16b, v5.16b, v3.16b
+; GISEL-NEXT: add v0.4s, v1.4s, v0.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index d64327656a9e01..f7d31a214563bc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -252,18 +252,17 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
;
; CHECK-GI-LABEL: uabd16b_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.8h v3, v1, v2
-; CHECK-GI-NEXT: usubl2.8h v1, v1, v2
-; CHECK-GI-NEXT: cmgt.8h v2, v0, v3
-; CHECK-GI-NEXT: cmgt.8h v0, v0, v1
-; CHECK-GI-NEXT: neg.8h v4, v3
-; CHECK-GI-NEXT: neg.8h v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.8h v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.8h v2, v0, v1
+; CHECK-GI-NEXT: usubl2.8h v0, v0, v1
+; CHECK-GI-NEXT: cmlt.8h v1, v2, #0
+; CHECK-GI-NEXT: cmlt.8h v3, v0, #0
+; CHECK-GI-NEXT: neg.8h v4, v2
+; CHECK-GI-NEXT: neg.8h v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.8h v0, v1, v0
; CHECK-GI-NEXT: addv.8h h0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -290,29 +289,28 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: uabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ushll.8h v3, v0, #0
-; CHECK-GI-NEXT: ushll.8h v4, v1, #0
+; CHECK-GI-NEXT: ushll.8h v2, v0, #0
+; CHECK-GI-NEXT: ushll.8h v3, v1, #0
; CHECK-GI-NEXT: ushll2.8h v0, v0, #0
; CHECK-GI-NEXT: ushll2.8h v1, v1, #0
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: usubl.4s v5, v3, v4
-; CHECK-GI-NEXT: usubl2.4s v3, v3, v4
-; CHECK-GI-NEXT: usubl.4s v4, v0, v1
+; CHECK-GI-NEXT: usubl.4s v4, v2, v3
+; CHECK-GI-NEXT: usubl2.4s v2, v2, v3
+; CHECK-GI-NEXT: usubl.4s v3, v0, v1
; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT: cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT: cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT: neg.4s v17, v2
+; CHECK-GI-NEXT: neg.4s v18, v3
; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
-; CHECK-GI-NEXT: add.4s v1, v1, v3
-; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: bsl.16b v1, v16, v4
+; CHECK-GI-NEXT: bit.16b v2, v17, v5
+; CHECK-GI-NEXT: bit.16b v3, v18, v6
+; CHECK-GI-NEXT: bit.16b v0, v19, v7
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v3, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -338,29 +336,28 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: sabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshll.8h v3, v0, #0
-; CHECK-GI-NEXT: sshll.8h v4, v1, #0
+; CHECK-GI-NEXT: sshll.8h v2, v0, #0
+; CHECK-GI-NEXT: sshll.8h v3, v1, #0
; CHECK-GI-NEXT: sshll2.8h v0, v0, #0
; CHECK-GI-NEXT: sshll2.8h v1, v1, #0
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.4s v5, v3, v4
-; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4
-; CHECK-GI-NEXT: ssubl.4s v4, v0, v1
+; CHECK-GI-NEXT: ssubl.4s v4, v2, v3
+; CHECK-GI-NEXT: ssubl2.4s v2, v2, v3
+; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT: cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT: cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT: neg.4s v17, v2
+; CHECK-GI-NEXT: neg.4s v18, v3
; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
-; CHECK-GI-NEXT: add.4s v1, v1, v3
-; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: bsl.16b v1, v16, v4
+; CHECK-GI-NEXT: bit.16b v2, v17, v5
+; CHECK-GI-NEXT: bit.16b v3, v18, v6
+; CHECK-GI-NEXT: bit.16b v0, v19, v7
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v3, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -391,18 +388,17 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
;
; CHECK-GI-LABEL: uabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.4s v3, v1, v2
-; CHECK-GI-NEXT: usubl2.4s v1, v1, v2
-; CHECK-GI-NEXT: cmgt.4s v2, v0, v3
-; CHECK-GI-NEXT: cmgt.4s v0, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v3
-; CHECK-GI-NEXT: neg.4s v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.4s v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.4s v2, v0, v1
+; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT: cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT: neg.4s v4, v2
+; CHECK-GI-NEXT: neg.4s v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -428,15 +424,14 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: sabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
+; CHECK-GI-NEXT: ssubl.4s v2, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v3
+; CHECK-GI-NEXT: cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT: cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT: neg.4s v4, v2
; CHECK-GI-NEXT: neg.4s v5, v0
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v3
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: bsl.16b v1, v4, v3
-; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -461,9 +456,8 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
;
; CHECK-GI-LABEL: uabdl4s_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v0
+; CHECK-GI-NEXT: cmlt.4s v1, v0, #0
; CHECK-GI-NEXT: neg.4s v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addv.4s s0, v0
@@ -494,18 +488,17 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
;
; CHECK-GI-LABEL: uabd4s_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.2d v3, v1, v2
-; CHECK-GI-NEXT: usubl2.2d v1, v1, v2
-; CHECK-GI-NEXT: cmgt.2d v2, v0, v3
-; CHECK-GI-NEXT: cmgt.2d v0, v0, v1
-; CHECK-GI-NEXT: neg.2d v4, v3
-; CHECK-GI-NEXT: neg.2d v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.2d v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.2d v2, v0, v1
+; CHECK-GI-NEXT: usubl2.2d v0, v0, v1
+; CHECK-GI-NEXT: cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT: cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT: neg.2d v4, v2
+; CHECK-GI-NEXT: neg.2d v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.2d v0, v1, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
@@ -531,15 +524,14 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-GI-LABEL: sabd4s_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.2d v3, v0, v1
+; CHECK-GI-NEXT: ssubl.2d v2, v0, v1
; CHECK-GI-NEXT: ssubl2.2d v0, v0, v1
-; CHECK-GI-NEXT: neg.2d v4, v3
+; CHECK-GI-NEXT: cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT: cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT: neg.2d v4, v2
; CHECK-GI-NEXT: neg.2d v5, v0
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v3
-; CHECK-GI-NEXT: cmgt.2d v2, v2, v0
-; CHECK-GI-NEXT: bsl.16b v1, v4, v3
-; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
; CHECK-GI-NEXT: add.2d v0, v1, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
@@ -564,9 +556,8 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabdl2d_rdx_i64:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.2d v0, v0, v1
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT: cmlt.2d v1, v0, #0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addp.2d d0, v0
@@ -1662,10 +1653,9 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern1:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.2s v2, v0
-; CHECK-GI-NEXT: cmge.2s v1, v0, v1
-; CHECK-GI-NEXT: bif.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.2s v1, v0
+; CHECK-GI-NEXT: cmge.2s v2, v0, #0
+; CHECK-GI-NEXT: bif.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sge <2 x i32> %a, zeroinitializer
@@ -1682,10 +1672,9 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern2:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.4h v2, v0
-; CHECK-GI-NEXT: cmgt.4h v1, v0, v1
-; CHECK-GI-NEXT: bif.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.4h v1, v0
+; CHECK-GI-NEXT: cmgt.4h v2, v0, #0
+; CHECK-GI-NEXT: bif.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <4 x i16> zeroinitializer, %a
%b = icmp sgt <4 x i16> %a, zeroinitializer
@@ -1701,10 +1690,9 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern3:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.8b v2, v0
-; CHECK-GI-NEXT: cmgt.8b v1, v1, v0
-; CHECK-GI-NEXT: bit.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.8b v1, v0
+; CHECK-GI-NEXT: cmlt.8b v2, v0, #0
+; CHECK-GI-NEXT: bit.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <8 x i8> zeroinitializer, %a
%b = icmp slt <8 x i8> %a, zeroinitializer
@@ -1720,10 +1708,9 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern4:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.4s v2, v0
-; CHECK-GI-NEXT: cmge.4s v1, v0, v1
-; CHECK-GI-NEXT: bif.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.4s v1, v0
+; CHECK-GI-NEXT: cmge.4s v2, v0, #0
+; CHECK-GI-NEXT: bif.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sge <4 x i32> %a, zeroinitializer
@@ -1739,10 +1726,9 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern5:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.8h v2, v0
-; CHECK-GI-NEXT: cmgt.8h v1, v0, v1
-; CHECK-GI-NEXT: bif.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.8h v1, v0
+; CHECK-GI-NEXT: cmgt.8h v2, v0, #0
+; CHECK-GI-NEXT: bif.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <8 x i16> zeroinitializer, %a
%b = icmp sgt <8 x i16> %a, zeroinitializer
@@ -1758,10 +1744,9 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern6:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.16b v2, v0
-; CHECK-GI-NEXT: cmgt.16b v1, v1, v0
-; CHECK-GI-NEXT: bit.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.16b v1, v0
+; CHECK-GI-NEXT: cmlt.16b v2, v0, #0
+; CHECK-GI-NEXT: bit.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <16 x i8> zeroinitializer, %a
%b = icmp slt <16 x i8> %a, zeroinitializer
@@ -1777,10 +1762,9 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern7:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.2d v2, v0
-; CHECK-GI-NEXT: cmge.2d v1, v1, v0
-; CHECK-GI-NEXT: bit.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.2d v1, v0
+; CHECK-GI-NEXT: cmle.2d v2, v0, #0
+; CHECK-GI-NEXT: bit.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sle <2 x i64> %a, zeroinitializer
@@ -1796,9 +1780,8 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabd_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.2d v0, v0, v1
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT: cmlt.2d v1, v0, #0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 8e10847e7aae34..06e69572bc5779 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -319,3 +319,667 @@ entry:
%s = select <32 x i1> %c, <32 x i8> %d, <32 x i8> %e
ret <32 x i8> %s
}
+
+; ===== ICMP Zero RHS =====
+
+define <8 x i1> @icmp_eq_v8i8_Zero_RHS(<8 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v8i8_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <8 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ ret <8 x i1> %c
+}
+
+define <16 x i1> @icmp_eq_v16i8_Zero_RHS(<16 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v16i8_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <16 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ ret <16 x i1> %c
+}
+
+define <4 x i1> @icmp_eq_v4i16_Zero_RHS(<4 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v4i16_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
+ ret <4 x i1> %c
+}
+
+define <8 x i1> @icmp_eq_v8i16_Zero_RHS(<8 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v8i16_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: ret
+ %c = icmp eq <8 x i16> %a, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+ ret <8 x i1> %c
+}
+
+define <2 x i1> @icmp_eq_v2i32_Zero_RHS(<2 x i32> %a) {
+; CHECK-LABEL: icmp_eq_v2i32_Zero_RHS:
+; CHECK: /...
[truncated]
|
If these patterns are only applicable to GlobalISel then you can move them to |
It might be better if we do not special case GlobalISel unless necessary and integrate it more into existing patterns. What do you think? |
257a3a8
to
0fa867d
Compare
Is there any effect on SDAG codegen? If so then we can keep it in InstrInfo but if these new patterns only influence GlobalISel then I think it makes sense to separate them to avoid confusion about their purpose. |
0fa867d
to
2d7afdd
Compare
Hi - I'm not sure if it is best to keep them separate, and continue treating GlobalISel as a special thing that is off in its own corner. It feels better in the long run to treat these as standard patterns, put where they belong compared to all the other instructions. They should probably be documented that they apply to GISel to keep it clear, and my opinions on this are not very strongly held. #89932 is probably the best place to mention if you think having the patterns separate is better. |
We do have a new control bit you can use to disable patterns from importing in GlobalIsel if the dag strategy is not useful here |
I can see the benefits of integration but if we do keep GISel patterns alongside the SDAG ones then comments indicating their purpose are a must IMO. @chuongg3 this is your call. |
2d7afdd
to
46e83d9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks. This is a nice cleanup.
46e83d9
to
9b71edf
Compare
No description provided.