Skip to content

Commit b3e74e3

Browse files
authored
[AArch64] Add SUBHN patterns for xor variant (#126100)
`xor x, -1` can be treated as `sub -1, x`, add patterns for generating subhn as opposed to a not. Fixes #123999
1 parent 7aed53e commit b3e74e3

File tree

2 files changed

+46
-63
lines changed

2 files changed

+46
-63
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 37 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
66306630
// CodeGen patterns for addhn and subhn instructions, which can actually be
66316631
// written in LLVM IR without too much difficulty.
66326632

6633-
// Prioritize ADDHN and SUBHN over UZP2.
6634-
let AddedComplexity = 10 in {
6635-
6636-
// ADDHN
6637-
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6638-
(ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6639-
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640-
(i32 16))))),
6641-
(ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6642-
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6643-
(i32 32))))),
6644-
(ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6645-
def : Pat<(concat_vectors (v8i8 V64:$Rd),
6646-
(trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6647-
(i32 8))))),
6648-
(ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6649-
V128:$Rn, V128:$Rm)>;
6650-
def : Pat<(concat_vectors (v4i16 V64:$Rd),
6651-
(trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6652-
(i32 16))))),
6653-
(ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6654-
V128:$Rn, V128:$Rm)>;
6655-
def : Pat<(concat_vectors (v2i32 V64:$Rd),
6656-
(trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6657-
(i32 32))))),
6658-
(ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6659-
V128:$Rn, V128:$Rm)>;
6660-
6661-
// SUBHN
6662-
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6663-
(SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6664-
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6665-
(i32 16))))),
6666-
(SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6667-
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6668-
(i32 32))))),
6669-
(SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6670-
def : Pat<(concat_vectors (v8i8 V64:$Rd),
6671-
(trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6672-
(i32 8))))),
6673-
(SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6674-
V128:$Rn, V128:$Rm)>;
6675-
def : Pat<(concat_vectors (v4i16 V64:$Rd),
6676-
(trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6677-
(i32 16))))),
6678-
(SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6679-
V128:$Rn, V128:$Rm)>;
6680-
def : Pat<(concat_vectors (v2i32 V64:$Rd),
6681-
(trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6682-
(i32 32))))),
6683-
(SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6684-
V128:$Rn, V128:$Rm)>;
6685-
6686-
} // AddedComplexity = 10
6633+
multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
6634+
Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
6635+
def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
6636+
(ADDHN V128:$Rn, V128:$Rm)>;
6637+
let AddedComplexity = 10 in
6638+
def : Pat<(concat_vectors (VT64 V64:$Rd),
6639+
(trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640+
(i32 Shift))))),
6641+
(ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6642+
6643+
def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
6644+
(SUBHN V128:$Rn, V128:$Rm)>;
6645+
let AddedComplexity = 10 in
6646+
def : Pat<(concat_vectors (VT64 V64:$Rd),
6647+
(trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6648+
(i32 Shift))))),
6649+
(SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6650+
6651+
// xor by -1 can also be treated as sub
6652+
def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
6653+
(SUBHN V128:$Rm, V128:$Rn)>;
6654+
let AddedComplexity = 10 in
6655+
def : Pat<(concat_vectors (VT64 V64:$Rd),
6656+
(trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
6657+
(i32 Shift))))),
6658+
(SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
6659+
}
6660+
6661+
defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
6662+
SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
6663+
v8i8, v8i16, 8>;
6664+
defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
6665+
SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
6666+
v4i16, v4i32, 16>;
6667+
defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
6668+
SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
6669+
v2i32, v2i64, 32>;
66876670

66886671
//----------------------------------------------------------------------------
66896672
// AdvSIMD bitwise extract from vector instruction.

llvm/test/CodeGen/AArch64/arm64-vadd.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,9 +1521,9 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
15211521
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
15221522
; CHECK-SD-LABEL: neg_narrow_i8:
15231523
; CHECK-SD: // %bb.0:
1524-
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
1525-
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
1526-
; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v1.16b
1524+
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
1525+
; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
1526+
; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
15271527
; CHECK-SD-NEXT: ret
15281528
;
15291529
; CHECK-GI-LABEL: neg_narrow_i8:
@@ -1542,9 +1542,9 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
15421542
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
15431543
; CHECK-SD-LABEL: neg_narrow_i16:
15441544
; CHECK-SD: // %bb.0:
1545-
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
1546-
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
1547-
; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
1545+
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
1546+
; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
1547+
; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
15481548
; CHECK-SD-NEXT: ret
15491549
;
15501550
; CHECK-GI-LABEL: neg_narrow_i16:
@@ -1563,9 +1563,9 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
15631563
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
15641564
; CHECK-SD-LABEL: neg_narrow_i32:
15651565
; CHECK-SD: // %bb.0:
1566-
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
1567-
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
1568-
; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
1566+
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
1567+
; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
1568+
; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
15691569
; CHECK-SD-NEXT: ret
15701570
;
15711571
; CHECK-GI-LABEL: neg_narrow_i32:

0 commit comments

Comments
 (0)