[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377

konstantinschwarz · 2024-11-07T22:05:54Z

No description provided.

llvmbot · 2024-11-07T22:06:28Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Konstantin Schwarz (konstantinschwarz)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/115377.diff

9 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+4)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+50)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+2-2)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir (+4-4)
(added) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir (+101)
(modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+6-6)
(modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+1-2)
(modified) llvm/test/CodeGen/AArch64/ext-narrow-index.ll (+34-76)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 72573facf1a7fe..f6af149708bc7a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -871,6 +871,10 @@ class CombinerHelper {
   /// Remove references to rhs if it is undef
   bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not
+  /// reference a.
+  bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
   /// Use a function which takes in a MachineIRBuilder to perform a combine.
   /// By default, it erases the instruction def'd on \p MO from the function.
   void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5928b369913916..6cf78cf013d498 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1577,6 +1577,13 @@ def combine_shuffle_undef_rhs : GICombineRule<
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
 >;
 
+def combine_shuffle_disjoint_mask : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+        [{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
 // match_extract_of_element and insert_vector_elt_oob must be the first!
 def vector_ops_combines: GICombineGroup<[
 match_extract_of_element_undef_vector,
@@ -1927,7 +1934,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
 
 def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
-                                       combine_shuffle_undef_rhs]>;
+                                       combine_shuffle_undef_rhs,
+                                       combine_shuffle_disjoint_mask]>;
 
 def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     vector_ops_combines, freeze_combines, cast_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3b648a7e3f4472..5d31ce940f31a2 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7726,3 +7726,53 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,
 
   return true;
 }
+
+bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
+                                              BuildFnTy &MatchInfo) {
+
+  auto &Shuffle = cast<GShuffleVector>(MI);
+  // If any of the two inputs is already undef, don't check the mask again to
+  // prevent infinite loop
+  if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
+    return false;
+
+  if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
+    return false;
+
+  ArrayRef<int> Mask = Shuffle.getMask();
+  const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
+
+  const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+
+  bool TouchesSrc1 = false;
+  bool TouchesSrc2 = false;
+  const unsigned NumElems = Mask.size();
+  for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+    if (Mask[Idx] < 0)
+      continue;
+
+    if (Mask[Idx] < (int)NumSrcElems)
+      TouchesSrc1 = true;
+    else
+      TouchesSrc2 = true;
+  }
+
+  if (!(TouchesSrc1 ^ TouchesSrc2))
+    return false;
+
+  MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
+    Register Undef = B.buildUndef(Src1Ty).getReg(0);
+    Register NewSrc1;
+    Register NewSrc2;
+    if (TouchesSrc1) {
+      NewSrc1 = Shuffle.getSrc1Reg();
+      NewSrc2 = Undef;
+    } else {
+      NewSrc1 = Undef;
+      NewSrc2 = Shuffle.getSrc2Reg();
+    }
+    B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, NewSrc2, Mask);
+  };
+
+  return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 83b5c388520eb3..e2933690c7c55a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -598,9 +598,9 @@ body:             |
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
     ; CHECK-NEXT: %idx:_(s64) = COPY $x1
-    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0)
     ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
     ; CHECK-NEXT: $w0 = COPY %extract(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index fa725ad0c5fb41..6b84a8488e478d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -1395,9 +1395,9 @@ body:             |
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
     ; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
     ; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
     ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64)
     ; CHECK-NEXT: $w0 = COPY %extract(s32)
@@ -1422,9 +1422,9 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %elt:_(s32) = COPY $w0
     ; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
     ; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
     ; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
     ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt
     ; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
new file mode 100644
index 00000000000000..e2ed8ac8e7150e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
@@ -0,0 +1,101 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios  -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: shuffle_vector_unused_lhs
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: shuffle_vector_unused_lhs
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<2 x s32>), [[COPY]], shufflemask(3, 2, 3, 2)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2)
+    RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_unused_rhs
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: shuffle_vector_unused_rhs
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1)
+    RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_both_used
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: shuffle_vector_both_used
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3)
+    RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_undef_elems
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: shuffle_vector_undef_elems
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
+    RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_scalar
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: shuffle_vector_scalar
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
+    RET_ReallyLR implicit %2
+...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 99dfac807dcd15..956e220e7efa95 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
 ; CHECK-GI:       // %bb.0: // %vector.header
 ; CHECK-GI-NEXT:    and w9, w3, #0xffff
 ; CHECK-GI-NEXT:    adrp x8, .LCPI2_0
-; CHECK-GI-NEXT:    dup v1.4s, w9
+; CHECK-GI-NEXT:    dup v0.4s, w9
 ; CHECK-GI-NEXT:    mov w9, w0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
 ; CHECK-GI-NEXT:    and x8, x9, #0xfffffff8
 ; CHECK-GI-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-GI-NEXT:    ldrh w9, [x2], #16
 ; CHECK-GI-NEXT:    subs x8, x8, #8
-; CHECK-GI-NEXT:    mov v0.s[0], w9
+; CHECK-GI-NEXT:    mov v2.s[0], w9
 ; CHECK-GI-NEXT:    mov w9, w0
 ; CHECK-GI-NEXT:    add w0, w0, #8
 ; CHECK-GI-NEXT:    lsl x9, x9, #2
-; CHECK-GI-NEXT:    tbl v3.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    mul v3.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT:    str q3, [x1, x9]
+; CHECK-GI-NEXT:    tbl v2.16b, { v2.16b, v3.16b }, v1.16b
+; CHECK-GI-NEXT:    mul v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    str q2, [x1, x9]
 ; CHECK-GI-NEXT:    b.ne .LBB2_1
 ; CHECK-GI-NEXT:  // %bb.2: // %for.end12
 ; CHECK-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 3c4901ade972ec..69a69dbd3b18ba 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
 ;
 ; CHECK-GI-LABEL: do_stuff:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
-; CHECK-GI-NEXT:    ext v2.16b, v1.16b, v2.16b, #8
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
 ; CHECK-GI-NEXT:    umull v0.2d, v2.2s, v0.2s
 ; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 2c5d33da93c863..177f2cafcf833a 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -17,17 +17,11 @@ entry:
 }
 
 define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-SD-LABEL: i8_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i8_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i8_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   ret <8 x i8> %shuffle
@@ -42,8 +36,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
 ;
 ; CHECK-GISEL-LABEL: i8_off8:
 ; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT:    mov d0, v0.d[1]
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -90,17 +83,11 @@ entry:
 }
 
 define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
-; CHECK-SD-LABEL: i16_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i16_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i16_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i16> %shuffle
@@ -140,17 +127,11 @@ entry:
 }
 
 define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
-; CHECK-SD-LABEL: i32_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i32_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i32_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %shuffle
@@ -228,18 +209,11 @@ entry:
 }
 
 define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
-; CHECK-SD-LABEL: i8_zero_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i8_zero_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i8_zero_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   ret <8 x i8> %shuffle
@@ -254,9 +228,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
 ;
 ; CHECK-GISEL-LABEL: i8_zero_off8:
 ; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT:    mov d0, v0.d[1]
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -283,8 +255,8 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
 ;
 ; CHECK-GISEL-LABEL: i8_zero_off22:
 ; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT:    ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v0.16b, #6
 ; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
 entry:
@@ -304,18 +276,11 @@ entry:
 }
 
 define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
-; CHECK-SD-LABEL: i16_zero_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i16_zero_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i16> %shuffle
@@ -355,18 +320,11 @@ entry:
 }
 
 define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
-; CHECK-SD-LABEL: i32_zero_off1:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off1:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: i32_zero_off1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %shuffle

davemgreen · 2024-11-07T22:13:27Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      NewSrc1 = Undef;
+      NewSrc2 = Shuffle.getSrc2Reg();


This could commute the mask directly, to avoid an unnecessary extra combine.

Right. I have a couple of shuffle canonicalization/simplification combines in the pipeline, one of them is indeed commuting the mask if the LHS is undef.
I think we still need that standalone combine to catch cases that are produced through other paths, but I can certainly do the commute here and safe an extra round in the combiner

tschuett · 2024-11-08T05:51:16Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  }
+
+  MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
+    Register Undef = B.buildUndef(Src1Ty).getReg(0);


legality check for undef

auto Undef = B.buildUndef(Src1Ty);

Maybe we should have a rule that undef is always legal for every type. I can't see why it would be any use to make it illegal.

Nope. If you want to build something, you have to check for legality. There cannot be a curated list of opcodes with weaker rules.

Please also check for the legality of the G_SHUFFLE_VECTOR.

COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal.

GlobalISel is about weird devices.

To clarify for people reading this thread in future: no that's not specifically what we designed GlobalISel for. I'm not sure where this notion originated from.

This rule needs to be more precise. Implicit def must only be legal for register types. That is, for any type that is legal in any operand of any operation, implicit_def must be legal for it.

+1

I agree that is odd one. I saw something then I am allowed to build it again?

Yes. Whether or not something is legal is context independent. Note there's some subtlety here, you're allowed to make different optimization decisions in the legalizer based on context but the same instruction + type tuple cannot be illegal in one context and legal in another.

If you weren't allowed to assume that an existing tuple in the MIR implied it was ok to build another, then something simple like instruction duplication or even moving would be incorrect. It also prevents us from caching legality which we may want to do in future for compile time performance.

If you want to build something you must go through isLegalOrBeforeLegalizer .
COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal

I'm not sure what makes you think that your opinion is the final word on any of this, but this isn't your decision to make unilaterally. Please cool it with this kind of language, I noted on other PRs like Craig Topper's that your wording is unnecessarily harsh. I see it again here.

COPY might be different. But for G_IMPLCIT_DEF state for which types it is legal (maybe all) in your legalizer. Never ever assume that something is legal.

And there are restrictions on what the legality rules must be. We should enforce in the verifier that G_IMPLICIT_DEF is legal for all types used in any operand, such that it should never be necessary to check if it is legal

Sorry, for sounding harsh. I(me) find it confusing to be weak about legality. Sometimes you have to go through isLegalOrBeforeLegalizer but not always, because undef is different. Why limit it to undef?

Can we instead document guidelines for building legalizers and using/querying them. And hinting at -Rpass-missed='gisel*'for debugging /building legalizers.

Sometimes you have to go through isLegalOrBeforeLegalizer but not always, because undef is different. Why limit it to undef?

See:

We don't have legality rules for COPY. COPY and [G_]IMPLICIT_DEF are a pair that at a low level need to be supported for any register value

... to elaborate further: if you can create a value with a type T, you always need to be able to create an IMPLICIT_DEF with that type. Likewise you also need to be able to COPY it (we could have introduced G_COPY and we'd be having the same discussion about that here). Implicit-def is just an expression of an undefined value of a certain type. The fact that you saw an instruction that uses or defines that type by definition means that you also should be able to have an implicit def of it.

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

tschuett · 2024-11-08T15:28:39Z

To clean up CombinerHelper, you could move the combine to:
https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

aemerson · 2024-11-12T19:41:49Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  // If any of the two inputs is already undef, don't check the mask again to
+  // prevent infinite loop
+  if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
+    return false;
+
+  if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
+    return false;


After the discussion on this PR, you can remove this again since it should be impossible for G_IMPLICIT_DEF of that type to be illegal.

aemerson

LGTM, minor nit.

aemerson · 2024-11-13T00:11:58Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      TouchesSrc2 = true;
+  }
+
+  if (!(TouchesSrc1 ^ TouchesSrc2))


Is this not the same as if (TouchesSrc1 == TouchesSrc2)?

…sk does not reference a

konstantinschwarz · 2024-11-14T22:05:31Z

llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll

+;
+; CHECK-GI-LABEL: v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d0, v0[1]


After rebasing this commit now also modifies this test.
I'm not an AArch64 expert, is there any drawback (or advantage) of using plain mov over ext?

Yeah it looks OK, they should both be about the same.

I would keep the prefixes though, to keep them in sync with how we run the other tests.

There are no common CHECK lines left, so FileCheck was complaining:

error: no check strings found with prefix 'CHECK:'

konstantinschwarz requested review from aemerson, jayfoad, arsenm and tschuett November 7, 2024 22:05

llvmbot added backend:AArch64 llvm:globalisel labels Nov 7, 2024

davemgreen reviewed Nov 7, 2024

View reviewed changes

konstantinschwarz force-pushed the kschwarz.upstream.shufflevector.disjoint.masks branch from 6ef7981 to 45b8786 Compare November 7, 2024 22:37

tschuett reviewed Nov 8, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp Show resolved Hide resolved

konstantinschwarz force-pushed the kschwarz.upstream.shufflevector.disjoint.masks branch from 45b8786 to ee67311 Compare November 8, 2024 21:21

aemerson reviewed Nov 12, 2024

View reviewed changes

konstantinschwarz force-pushed the kschwarz.upstream.shufflevector.disjoint.masks branch from ee67311 to 9716d75 Compare November 12, 2024 23:53

aemerson approved these changes Nov 13, 2024

View reviewed changes

[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff ma…

1776e3b

…sk does not reference a

konstantinschwarz force-pushed the kschwarz.upstream.shufflevector.disjoint.masks branch from 9716d75 to 1776e3b Compare November 14, 2024 22:02

konstantinschwarz commented Nov 14, 2024

View reviewed changes

konstantinschwarz merged commit 0f0e2fe into llvm:main Nov 14, 2024
6 of 8 checks passed

konstantinschwarz deleted the kschwarz.upstream.shufflevector.disjoint.masks branch November 14, 2024 23:13

This was referenced Dec 5, 2024

[GlobalISel] Combine [s,z]ext of undef into 0 #117439

Merged

[GlobalISel] Document minimum legality requirements for G_IMPLICIT_DEF. #117609

Merged

[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377

[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377

Uh oh!

Conversation

konstantinschwarz commented Nov 7, 2024

Uh oh!

llvmbot commented Nov 7, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tschuett Nov 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

tschuett commented Nov 8, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

aemerson left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Nov 7, 2024 •

edited

Loading

tschuett Nov 12, 2024 •

edited

Loading

tschuett commented Nov 8, 2024 •

edited

Loading