Skip to content

[GlobalISel] Add computeNumSignBits for G_SHUFFLE_VECTOR #139505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 20, 2025

Conversation

davemgreen
Copy link
Collaborator

The code is similar to computeKnownBits and the code in SelectionDAG::ComputeNumSignBits.

@llvmbot
Copy link
Member

llvmbot commented May 12, 2025

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

The code is similar to computeKnownBits and the code in SelectionDAG::ComputeNumSignBits.


Full diff: https://github.com/llvm/llvm-project/pull/139505.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+24)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll (+10-23)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+15-26)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 41e36e1e6640b..fb483ed962270 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -874,6 +874,30 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
                                        SrcTy.getScalarSizeInBits());
     break;
   }
+  case TargetOpcode::G_SHUFFLE_VECTOR: {
+    // Collect the minimum number of sign bits that are shared by every vector
+    // element referenced by the shuffle.
+    APInt DemandedLHS, DemandedRHS;
+    unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
+    if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
+                                DemandedElts, DemandedLHS, DemandedRHS))
+      return 1;
+
+    unsigned Tmp = std::numeric_limits<unsigned>::max();
+    if (!!DemandedLHS)
+      Tmp =
+          computeNumSignBits(MI.getOperand(1).getReg(), DemandedLHS, Depth + 1);
+    if (!!DemandedRHS) {
+      unsigned Tmp2 =
+          computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
+      Tmp = std::min(Tmp, Tmp2);
+    }
+    // If we don't know anything, early out and try computeKnownBits fall-back.
+    if (Tmp == 1)
+      break;
+    assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
+    return Tmp;
+  }
   case TargetOpcode::G_INTRINSIC:
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
   case TargetOpcode::G_INTRINSIC_CONVERGENT:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 56393142726c7..d86cbf57a65f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -400,9 +400,10 @@ define <8 x i16> @missing_insert(<8 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: missing_insert:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ext v1.16b, v0.16b, v0.16b, #4
-; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
+; CHECK-GI-NEXT:    xtn v1.8b, v1.8h
+; CHECK-GI-NEXT:    smull v0.8h, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ret
 entry:
   %ext.b = sext <8 x i8> %b to <8 x i16>
@@ -421,10 +422,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
 ; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    smull v0.8h, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <8 x i8> %src to <8 x i16>
@@ -444,16 +445,9 @@ define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
 ; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mov x11, v1.d[1]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    mov x10, v0.d[1]
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    mul x9, x10, x11
-; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <2 x i32> %src to <2 x i64>
@@ -496,16 +490,9 @@ define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
 ; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mov x11, v1.d[1]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    mov x10, v0.d[1]
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    mul x9, x10, x11
-; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <2 x i32> %src to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index eee1ec0b37315..b89b422c8c5ad 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -834,30 +834,18 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-GI-NEXT:    mov x8, xzr
 ; CHECK-GI-NEXT:    dup v0.2d, v0.d[1]
-; CHECK-GI-NEXT:    mov x9, v0.d[1]
-; CHECK-GI-NEXT:    fmov x10, d0
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:  .LBB7_1: // %loop
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    subs x2, x2, #8
 ; CHECK-GI-NEXT:    add x8, x8, #8
-; CHECK-GI-NEXT:    sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT:    fmov x11, d1
-; CHECK-GI-NEXT:    mov x12, v1.d[1]
-; CHECK-GI-NEXT:    fmov x13, d0
-; CHECK-GI-NEXT:    mov x14, v0.d[1]
-; CHECK-GI-NEXT:    mul x11, x11, x10
-; CHECK-GI-NEXT:    mul x13, x13, x10
-; CHECK-GI-NEXT:    mul x12, x12, x9
-; CHECK-GI-NEXT:    mov v0.d[0], x11
-; CHECK-GI-NEXT:    mul x11, x14, x9
-; CHECK-GI-NEXT:    mov v1.d[0], x13
-; CHECK-GI-NEXT:    mov v0.d[1], x12
-; CHECK-GI-NEXT:    mov v1.d[1], x11
-; CHECK-GI-NEXT:    shrn v0.2s, v0.2d, #15
-; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #15
-; CHECK-GI-NEXT:    str q0, [x0], #32
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    smull v1.2d, v1.2s, v0.2s
+; CHECK-GI-NEXT:    smull v2.2d, v2.2s, v0.2s
+; CHECK-GI-NEXT:    shrn v1.2s, v1.2d, #15
+; CHECK-GI-NEXT:    shrn2 v1.4s, v2.2d, #15
+; CHECK-GI-NEXT:    str q1, [x0], #32
 ; CHECK-GI-NEXT:    b.ne .LBB7_1
 ; CHECK-GI-NEXT:  // %bb.2: // %exit
 ; CHECK-GI-NEXT:    ret
@@ -971,18 +959,19 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll2 v0.8h, v0.16b, #0
 ; CHECK-GI-NEXT:    mov x8, xzr
+; CHECK-GI-NEXT:    dup v0.8h, v0.h[2]
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-GI-NEXT:  .LBB9_1: // %loop
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    subs x2, x2, #8
 ; CHECK-GI-NEXT:    add x8, x8, #8
-; CHECK-GI-NEXT:    sshll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT:    sshll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT:    mul v2.8h, v2.8h, v0.h[2]
-; CHECK-GI-NEXT:    mul v1.8h, v1.8h, v0.h[2]
-; CHECK-GI-NEXT:    sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    smull v1.8h, v1.8b, v0.8b
+; CHECK-GI-NEXT:    smull v2.8h, v2.8b, v0.8b
 ; CHECK-GI-NEXT:    sshr v1.8h, v1.8h, #15
-; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT:    uzp1 v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NEXT:    str q1, [x0], #32
 ; CHECK-GI-NEXT:    b.ne .LBB9_1
 ; CHECK-GI-NEXT:  // %bb.2: // %exit

@llvmbot
Copy link
Member

llvmbot commented May 12, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

The code is similar to computeKnownBits and the code in SelectionDAG::ComputeNumSignBits.


Full diff: https://github.com/llvm/llvm-project/pull/139505.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+24)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll (+10-23)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+15-26)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 41e36e1e6640b..fb483ed962270 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -874,6 +874,30 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
                                        SrcTy.getScalarSizeInBits());
     break;
   }
+  case TargetOpcode::G_SHUFFLE_VECTOR: {
+    // Collect the minimum number of sign bits that are shared by every vector
+    // element referenced by the shuffle.
+    APInt DemandedLHS, DemandedRHS;
+    unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
+    if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
+                                DemandedElts, DemandedLHS, DemandedRHS))
+      return 1;
+
+    unsigned Tmp = std::numeric_limits<unsigned>::max();
+    if (!!DemandedLHS)
+      Tmp =
+          computeNumSignBits(MI.getOperand(1).getReg(), DemandedLHS, Depth + 1);
+    if (!!DemandedRHS) {
+      unsigned Tmp2 =
+          computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
+      Tmp = std::min(Tmp, Tmp2);
+    }
+    // If we don't know anything, early out and try computeKnownBits fall-back.
+    if (Tmp == 1)
+      break;
+    assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
+    return Tmp;
+  }
   case TargetOpcode::G_INTRINSIC:
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
   case TargetOpcode::G_INTRINSIC_CONVERGENT:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 56393142726c7..d86cbf57a65f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -400,9 +400,10 @@ define <8 x i16> @missing_insert(<8 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: missing_insert:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ext v1.16b, v0.16b, v0.16b, #4
-; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
+; CHECK-GI-NEXT:    xtn v1.8b, v1.8h
+; CHECK-GI-NEXT:    smull v0.8h, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ret
 entry:
   %ext.b = sext <8 x i8> %b to <8 x i16>
@@ -421,10 +422,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
 ; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-GI-NEXT:    smull v0.8h, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <8 x i8> %src to <8 x i16>
@@ -444,16 +445,9 @@ define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
 ; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mov x11, v1.d[1]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    mov x10, v0.d[1]
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    mul x9, x10, x11
-; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <2 x i32> %src to <2 x i64>
@@ -496,16 +490,9 @@ define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
 ; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
 ; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mov x11, v1.d[1]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    mov x10, v0.d[1]
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    mul x9, x10, x11
-; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    mov v0.d[1], x9
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-GI-NEXT:    ret
 entry:
   %in = sext <2 x i32> %src to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index eee1ec0b37315..b89b422c8c5ad 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -834,30 +834,18 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-GI-NEXT:    mov x8, xzr
 ; CHECK-GI-NEXT:    dup v0.2d, v0.d[1]
-; CHECK-GI-NEXT:    mov x9, v0.d[1]
-; CHECK-GI-NEXT:    fmov x10, d0
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:  .LBB7_1: // %loop
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    subs x2, x2, #8
 ; CHECK-GI-NEXT:    add x8, x8, #8
-; CHECK-GI-NEXT:    sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT:    fmov x11, d1
-; CHECK-GI-NEXT:    mov x12, v1.d[1]
-; CHECK-GI-NEXT:    fmov x13, d0
-; CHECK-GI-NEXT:    mov x14, v0.d[1]
-; CHECK-GI-NEXT:    mul x11, x11, x10
-; CHECK-GI-NEXT:    mul x13, x13, x10
-; CHECK-GI-NEXT:    mul x12, x12, x9
-; CHECK-GI-NEXT:    mov v0.d[0], x11
-; CHECK-GI-NEXT:    mul x11, x14, x9
-; CHECK-GI-NEXT:    mov v1.d[0], x13
-; CHECK-GI-NEXT:    mov v0.d[1], x12
-; CHECK-GI-NEXT:    mov v1.d[1], x11
-; CHECK-GI-NEXT:    shrn v0.2s, v0.2d, #15
-; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #15
-; CHECK-GI-NEXT:    str q0, [x0], #32
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    smull v1.2d, v1.2s, v0.2s
+; CHECK-GI-NEXT:    smull v2.2d, v2.2s, v0.2s
+; CHECK-GI-NEXT:    shrn v1.2s, v1.2d, #15
+; CHECK-GI-NEXT:    shrn2 v1.4s, v2.2d, #15
+; CHECK-GI-NEXT:    str q1, [x0], #32
 ; CHECK-GI-NEXT:    b.ne .LBB7_1
 ; CHECK-GI-NEXT:  // %bb.2: // %exit
 ; CHECK-GI-NEXT:    ret
@@ -971,18 +959,19 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll2 v0.8h, v0.16b, #0
 ; CHECK-GI-NEXT:    mov x8, xzr
+; CHECK-GI-NEXT:    dup v0.8h, v0.h[2]
+; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-GI-NEXT:  .LBB9_1: // %loop
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    subs x2, x2, #8
 ; CHECK-GI-NEXT:    add x8, x8, #8
-; CHECK-GI-NEXT:    sshll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT:    sshll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT:    mul v2.8h, v2.8h, v0.h[2]
-; CHECK-GI-NEXT:    mul v1.8h, v1.8h, v0.h[2]
-; CHECK-GI-NEXT:    sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    smull v1.8h, v1.8b, v0.8b
+; CHECK-GI-NEXT:    smull v2.8h, v2.8b, v0.8b
 ; CHECK-GI-NEXT:    sshr v1.8h, v1.8h, #15
-; CHECK-GI-NEXT:    uzp1 v1.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT:    uzp1 v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NEXT:    str q1, [x0], #32
 ; CHECK-GI-NEXT:    b.ne .LBB9_1
 ; CHECK-GI-NEXT:  // %bb.2: // %exit

@jayfoad jayfoad changed the title [GlobalISel] Add computeKnownBits for G_SHUFFLE_VECTOR [GlobalISel] Add computeNumSignBits for G_SHUFFLE_VECTOR May 12, 2025
@davemgreen davemgreen changed the base branch from users/davemgreen/gh-gi-ashr to main May 13, 2025 05:19
@davemgreen davemgreen force-pushed the gh-gi-shufflenumsignbits branch from 68fc0c4 to d4c41e8 Compare May 13, 2025 05:56
The code is similar to computeKnownBits and the code in
SelectionDAG::ComputeNumSignBits
@davemgreen davemgreen force-pushed the gh-gi-shufflenumsignbits branch from d4c41e8 to b619709 Compare May 17, 2025 11:28
@davemgreen davemgreen merged commit d644597 into llvm:main May 20, 2025
9 of 11 checks passed
@davemgreen davemgreen deleted the gh-gi-shufflenumsignbits branch May 20, 2025 07:25
@llvm-ci
Copy link
Collaborator

llvm-ci commented May 20, 2025

LLVM Buildbot has detected a new failure on builder lldb-x86_64-debian running on lldb-x86_64-debian while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/22751

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Python/Crashlog/parser_json.test (2942 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Python/Crashlog/no_threadState.test (2943 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Lua/persistent_state.test (2944 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Lua/breakpoint_oneline_callback.test (2945 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Lua/nested_sessions.test (2946 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Lua/io.test (2947 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Lua/breakpoint_function_callback.test (2948 of 2953)
UNSUPPORTED: lldb-shell :: ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test (2949 of 2953)
PASS: lldb-api :: terminal/TestEditlineCompletions.py (2950 of 2953)
UNRESOLVED: lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py (2951 of 2953)
******************** TEST 'lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py' FAILED ********************
Script:
--
/usr/bin/python3 /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./lib --env LLVM_INCLUDE_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/include --env LLVM_TOOLS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./bin --arch x86_64 --build-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex --lldb-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/lldb --compiler /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/clang --dsymutil /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./bin --lldb-obj-root /home/worker/2.0.1/lldb-x86_64-debian/build/tools/lldb --lldb-libs-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./lib -t /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/tools/lldb-dap/launch -p TestDAP_launch.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision d644597f57050d575212bd7b20abad892a07e6de)
  clang revision d644597f57050d575212bd7b20abad892a07e6de
  llvm revision d644597f57050d575212bd7b20abad892a07e6de
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
Change dir to: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/tools/lldb-dap/launch
runCmd: settings clear --all

output: 

runCmd: settings set symbols.enable-external-lookup false

output: 

runCmd: settings set target.inherit-tcc true

output: 

runCmd: settings set target.disable-aslr false

output: 

runCmd: settings set target.detach-on-error false

output: 

runCmd: settings set target.auto-apply-fixits false

sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Jun 3, 2025
The code is similar to computeKnownBits and the code in
SelectionDAG::ComputeNumSignBits.
ajaden-codes pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 6, 2025
The code is similar to computeKnownBits and the code in
SelectionDAG::ComputeNumSignBits.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants