-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Konstantin Schwarz (konstantinschwarz) ChangesFull diff: https://github.com/llvm/llvm-project/pull/115377.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 72573facf1a7fe..f6af149708bc7a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -871,6 +871,10 @@ class CombinerHelper {
/// Remove references to rhs if it is undef
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not
+ /// reference a.
+ bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction def'd on \p MO from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5928b369913916..6cf78cf013d498 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1577,6 +1577,13 @@ def combine_shuffle_undef_rhs : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;
+def combine_shuffle_disjoint_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
@@ -1927,7 +1934,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
- combine_shuffle_undef_rhs]>;
+ combine_shuffle_undef_rhs,
+ combine_shuffle_disjoint_mask]>;
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3b648a7e3f4472..5d31ce940f31a2 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7726,3 +7726,53 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,
return true;
}
+
+bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ auto &Shuffle = cast<GShuffleVector>(MI);
+ // If any of the two inputs is already undef, don't check the mask again to
+ // prevent infinite loop
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
+ return false;
+
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
+ return false;
+
+ ArrayRef<int> Mask = Shuffle.getMask();
+ const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
+
+ const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+
+ bool TouchesSrc1 = false;
+ bool TouchesSrc2 = false;
+ const unsigned NumElems = Mask.size();
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ if (Mask[Idx] < 0)
+ continue;
+
+ if (Mask[Idx] < (int)NumSrcElems)
+ TouchesSrc1 = true;
+ else
+ TouchesSrc2 = true;
+ }
+
+ if (!(TouchesSrc1 ^ TouchesSrc2))
+ return false;
+
+ MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
+ Register Undef = B.buildUndef(Src1Ty).getReg(0);
+ Register NewSrc1;
+ Register NewSrc2;
+ if (TouchesSrc1) {
+ NewSrc1 = Shuffle.getSrc1Reg();
+ NewSrc2 = Undef;
+ } else {
+ NewSrc1 = Undef;
+ NewSrc2 = Shuffle.getSrc2Reg();
+ }
+ B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, NewSrc2, Mask);
+ };
+
+ return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 83b5c388520eb3..e2933690c7c55a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -598,9 +598,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = COPY $x1
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0)
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index fa725ad0c5fb41..6b84a8488e478d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -1395,9 +1395,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
@@ -1422,9 +1422,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %elt:_(s32) = COPY $w0
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt
; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
new file mode 100644
index 00000000000000..e2ed8ac8e7150e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
@@ -0,0 +1,101 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: shuffle_vector_unused_lhs
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_unused_lhs
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<2 x s32>), [[COPY]], shufflemask(3, 2, 3, 2)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_unused_rhs
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_unused_rhs
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_both_used
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_both_used
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_undef_elems
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_undef_elems
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_scalar
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: shuffle_vector_scalar
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
+ RET_ReallyLR implicit %2
+...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 99dfac807dcd15..956e220e7efa95 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
; CHECK-GI: // %bb.0: // %vector.header
; CHECK-GI-NEXT: and w9, w3, #0xffff
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
-; CHECK-GI-NEXT: dup v1.4s, w9
+; CHECK-GI-NEXT: dup v0.4s, w9
; CHECK-GI-NEXT: mov w9, w0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NEXT: and x8, x9, #0xfffffff8
; CHECK-GI-NEXT: .LBB2_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w9, [x2], #16
; CHECK-GI-NEXT: subs x8, x8, #8
-; CHECK-GI-NEXT: mov v0.s[0], w9
+; CHECK-GI-NEXT: mov v2.s[0], w9
; CHECK-GI-NEXT: mov w9, w0
; CHECK-GI-NEXT: add w0, w0, #8
; CHECK-GI-NEXT: lsl x9, x9, #2
-; CHECK-GI-NEXT: tbl v3.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: mul v3.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: str q3, [x1, x9]
+; CHECK-GI-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v1.16b
+; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: str q2, [x1, x9]
; CHECK-GI-NEXT: b.ne .LBB2_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 3c4901ade972ec..69a69dbd3b18ba 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
;
; CHECK-GI-LABEL: do_stuff:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
-; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8
+; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 2c5d33da93c863..177f2cafcf833a 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -17,17 +17,11 @@ entry:
}
define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-SD-LABEL: i8_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i8_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -42,8 +36,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
;
; CHECK-GISEL-LABEL: i8_off8:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -90,17 +83,11 @@ entry:
}
define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
-; CHECK-SD-LABEL: i16_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i16_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -140,17 +127,11 @@ entry:
}
define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
-; CHECK-SD-LABEL: i32_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i32_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
@@ -228,18 +209,11 @@ entry:
}
define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
-; CHECK-SD-LABEL: i8_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i8_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -254,9 +228,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off8:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -283,8 +255,8 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off22:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #6
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
entry:
@@ -304,18 +276,11 @@ entry:
}
define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
-; CHECK-SD-LABEL: i16_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i16_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -355,18 +320,11 @@ entry:
}
define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
-; CHECK-SD-LABEL: i32_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i32_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
|
NewSrc1 = Undef; | ||
NewSrc2 = Shuffle.getSrc2Reg(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could commute the mask directly, to avoid an unnecessary extra combine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. I have a couple of shuffle canonicalization/simplification combines in the pipeline, one of them is indeed commuting the mask if the LHS is undef.
I think we still need that standalone combine to catch cases that are produced through other paths, but I can certainly do the commute here and safe an extra round in the combiner
6ef7981
to
45b8786
Compare
} | ||
|
||
MatchInfo = [=, &Shuffle](MachineIRBuilder &B) { | ||
Register Undef = B.buildUndef(Src1Ty).getReg(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
legality check for undef
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto Undef = B.buildUndef(Src1Ty);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should have a rule that undef is always legal for every type. I can't see why it would be any use to make it illegal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope. If you want to build something, you have to check for legality. There cannot be a curated list of opcodes with weaker rules.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also check for the legality of the G_SHUFFLE_VECTOR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GlobalISel is about weird devices.
To clarify for people reading this thread in future: no that's not specifically what we designed GlobalISel for. I'm not sure where this notion originated from.
This rule needs to be more precise. Implicit def must only be legal for register types. That is, for any type that is legal in any operand of any operation, implicit_def must be legal for it.
+1
I agree that is odd one. I saw something then I am allowed to build it again?
Yes. Whether or not something is legal is context independent. Note there's some subtlety here, you're allowed to make different optimization decisions in the legalizer based on context but the same instruction + type tuple cannot be illegal in one context and legal in another.
If you weren't allowed to assume that an existing tuple in the MIR implied it was ok to build another, then something simple like instruction duplication or even moving would be incorrect. It also prevents us from caching legality which we may want to do in future for compile time performance.
If you want to build something you must go through isLegalOrBeforeLegalizer .
COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal
I'm not sure what makes you think that your opinion is the final word on any of this, but this isn't your decision to make unilaterally. Please cool it with this kind of language, I noted on other PRs like Craig Topper's that your wording is unnecessarily harsh. I see it again here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal.
And there are restrictions on what the legality rules must be. We should enforce in the verifier that G_IMPLICIT_DEF is legal for all types used in any operand, such that it should never be necessary to check if it is legal
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, for sounding harsh. I(me) find it confusing to be weak about legality. Sometimes you have to go through isLegalOrBeforeLegalizer but not always, because undef is different. Why limit it to undef?
Can we instead document guidelines for building legalizers and using/querying them. And hinting at -Rpass-missed='gisel*'
for debugging /building legalizers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sometimes you have to go through isLegalOrBeforeLegalizer but not always, because undef is different. Why limit it to undef?
See:
We don't have legality rules for COPY. COPY and [G_]IMPLICIT_DEF are a pair that at a low level need to be supported for any register value
... to elaborate further: if you can create a value with a type T, you always need to be able to create an IMPLICIT_DEF with that type. Likewise you also need to be able to COPY it (we could have introduced G_COPY and we'd be having the same discussion about that here). Implicit-def is just an expression of an undefined value of a certain type. The fact that you saw an instruction that uses or defines that type by definition means that you also should be able to have an implicit def of it.
To clean up CombinerHelper, you could move the combine to: |
45b8786
to
ee67311
Compare
// If any of the two inputs is already undef, don't check the mask again to | ||
// prevent infinite loop | ||
if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI)) | ||
return false; | ||
|
||
if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI)) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After the discussion on this PR, you can remove this again since it should be impossible for G_IMPLICIT_DEF of that type to be illegal.
ee67311
to
9716d75
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, minor nit.
TouchesSrc2 = true; | ||
} | ||
|
||
if (!(TouchesSrc1 ^ TouchesSrc2)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this not the same as if (TouchesSrc1 == TouchesSrc2)
?
…sk does not reference a
9716d75
to
1776e3b
Compare
; | ||
; CHECK-GI-LABEL: v8i8: | ||
; CHECK-GI: // %bb.0: | ||
; CHECK-GI-NEXT: mov d0, v0[1] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After rebasing this commit now also modifies this test.
I'm not an AArch64 expert, is there any drawback (or advantage) of using plain mov
over ext
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah it looks OK, they should both be about the same.
I would keep the prefixes though, to keep them in sync with how we run the other tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are no common CHECK lines left, so FileCheck was complaining:
error: no check strings found with prefix 'CHECK:'
No description provided.