-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Add SUBHN patterns for xor variant #126100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`xor x, -1` can be treated as `sub -1, x`, add patterns for generating subhn as opposed to a not. Fixes llvm#123999
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) Changes
Fixes #123999 Full diff: https://github.com/llvm/llvm-project/pull/126100.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ce0c260b78410f..658ac7490eb338 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.
-// Prioritize ADDHN and SUBHN over UZP2.
-let AddedComplexity = 10 in {
-
-// ADDHN
-def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
- (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 8))))),
- (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-// SUBHN
-def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
- (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 8))))),
- (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-} // AddedComplexity = 10
+multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
+ Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
+ (ADDHN V128:$Rn, V128:$Rm)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 Shift))))),
+ (ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
+
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
+ (SUBHN V128:$Rn, V128:$Rm)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 Shift))))),
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
+
+ // xor by -1 can also be treated as sub
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
+ (SUBHN V128:$Rm, V128:$Rn)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
+ (i32 Shift))))),
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
+}
+
+defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
+ SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
+ v8i8, v8i16, 8>;
+defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
+ SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
+ v4i16, v4i32, 16>;
+defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
+ SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
+ v2i32, v2i64, 32>;
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index c893138cf7a8cd..d982dbbb1f69b9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1521,9 +1521,9 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
; CHECK-SD-LABEL: neg_narrow_i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
+; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i8:
@@ -1542,9 +1542,9 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
; CHECK-SD-LABEL: neg_narrow_i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
+; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i16:
@@ -1563,9 +1563,9 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
; CHECK-SD-LABEL: neg_narrow_i32:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
+; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i32:
|
SamTebbs33
approved these changes
Feb 7, 2025
Icohedron
pushed a commit
to Icohedron/llvm-project
that referenced
this pull request
Feb 11, 2025
`xor x, -1` can be treated as `sub -1, x`, add patterns for generating subhn as opposed to a not. Fixes llvm#123999
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
xor x, -1
can be treated assub -1, x
, add patterns for generating subhn as opposed to a not.Fixes #123999