-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GlobalISel] Full reverse shuffles. #119083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesA full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles. Full diff: https://github.com/llvm/llvm-project/pull/119083.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1b1d81fcd07a2b..3d2507fc1f25c4 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -131,6 +131,13 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def fullrev: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
+ [{ return matchFullRev(*${root}, MRI); }]),
+ (apply [{ applyFullRev(*${root}, MRI); }])
+>;
+
def insertelt_nonconst: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +170,7 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 56d70ffdece713..244c0686750837 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -405,6 +405,28 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+bool matchFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ Register V1 = MI.getOperand(1).getReg();
+ auto Mask = MI.getOperand(3).getShuffleMask();
+ return (DstTy == LLT::fixed_vector(16, 8) ||
+ DstTy == LLT::fixed_vector(8, 16)) &&
+ DstTy == MRI.getType(V1) &&
+ ShuffleVectorInst::isReverseMask(Mask, Mask.size());
+}
+
+void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ MachineIRBuilder MIRBuilder(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
+ auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {MRI.getType(Dst)}, {Src});
+ MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
+ MI.eraseFromParent();
+}
+
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 0ce92a20fb3a17..a3bc2b58d708f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -407,11 +407,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
@@ -460,11 +459,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI15_0
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index 89838391956f29..7b0e0cfe04c010 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -60,19 +60,11 @@ entry:
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI4_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i16> %V128
@@ -112,19 +104,11 @@ entry:
}
define <16 x i8> @v16i8(<16 x i8> %a) {
-; CHECK-SD-LABEL: v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i8> %V128
@@ -203,19 +187,11 @@ entry:
}
define <8 x half> @v8f16(<8 x half> %a) {
-; CHECK-SD-LABEL: v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x half> %V128
|
tschuett
reviewed
Dec 7, 2024
arsenm
reviewed
Dec 8, 2024
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles.
dda0d62
to
eb4bc5e
Compare
aemerson
approved these changes
Dec 27, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles.