Skip to content

[AArch64] Add SUBHN patterns for xor variant #126100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 37 additions & 54 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.

// Prioritize ADDHN and SUBHN over UZP2.
let AddedComplexity = 10 in {

// ADDHN
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
(ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 16))))),
(ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 32))))),
(ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v8i8 V64:$Rd),
(trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 8))))),
(ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v4i16 V64:$Rd),
(trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 16))))),
(ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v2i32 V64:$Rd),
(trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 32))))),
(ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;

// SUBHN
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
(SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 16))))),
(SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 32))))),
(SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v8i8 V64:$Rd),
(trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 8))))),
(SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v4i16 V64:$Rd),
(trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 16))))),
(SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v2i32 V64:$Rd),
(trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 32))))),
(SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;

} // AddedComplexity = 10
multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
(ADDHN V128:$Rn, V128:$Rm)>;
let AddedComplexity = 10 in
def : Pat<(concat_vectors (VT64 V64:$Rd),
(trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 Shift))))),
(ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;

def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
(SUBHN V128:$Rn, V128:$Rm)>;
let AddedComplexity = 10 in
def : Pat<(concat_vectors (VT64 V64:$Rd),
(trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 Shift))))),
(SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;

// xor by -1 can also be treated as sub
def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
(SUBHN V128:$Rm, V128:$Rn)>;
let AddedComplexity = 10 in
def : Pat<(concat_vectors (VT64 V64:$Rd),
(trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
(i32 Shift))))),
(SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
}

defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
v8i8, v8i16, 8>;
defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
v4i16, v4i32, 16>;
defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
v2i32, v2i64, 32>;

//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AArch64/arm64-vadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1521,9 +1521,9 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
; CHECK-SD-LABEL: neg_narrow_i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i8:
Expand All @@ -1542,9 +1542,9 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
; CHECK-SD-LABEL: neg_narrow_i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i16:
Expand All @@ -1563,9 +1563,9 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
; CHECK-SD-LABEL: neg_narrow_i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i32:
Expand Down