Skip to content

[GlobalISel] Add computeNumSignBits for G_SHUFFLE_VECTOR #139505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,28 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
}
break;
}
case TargetOpcode::G_SHUFFLE_VECTOR: {
// Collect the minimum number of sign bits that are shared by every vector
// element referenced by the shuffle.
APInt DemandedLHS, DemandedRHS;
Register Src1 = MI.getOperand(1).getReg();
unsigned NumElts = MRI.getType(Src1).getNumElements();
if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
DemandedElts, DemandedLHS, DemandedRHS))
return 1;

if (!!DemandedLHS)
FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
// If we don't know anything, early out and try computeKnownBits fall-back.
if (FirstAnswer == 1)
break;
if (!!DemandedRHS) {
unsigned Tmp2 =
computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
}
break;
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/GlobalISel/knownbits-shuffle.mir
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ body: |
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:9
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:9
%0:_(<2 x s8>) = COPY $h0
%1:_(<2 x s16>) = COPY $s1
%2:_(<2 x s16>) = G_SEXT %0
Expand Down
48 changes: 14 additions & 34 deletions llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-GI-LABEL: dupsext_v4i16_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: dup v1.4s, w8
; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
; CHECK-GI-NEXT: smull v0.4s, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
entry:
%in = sext i16 %src to i32
Expand Down Expand Up @@ -108,16 +108,9 @@ define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
; CHECK-GI-NEXT: smull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
entry:
%in = sext i32 %src to i64
Expand Down Expand Up @@ -399,9 +392,10 @@ define <8 x i16> @missing_insert(<8 x i8> %b) {
;
; CHECK-GI-LABEL: missing_insert:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
; CHECK-GI-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
entry:
%ext.b = sext <8 x i8> %b to <8 x i16>
Expand All @@ -420,10 +414,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
entry:
%in = sext <8 x i8> %src to <8 x i16>
Expand All @@ -443,16 +437,9 @@ define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: mov x10, v0.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
entry:
%in = sext <2 x i32> %src to <2 x i64>
Expand Down Expand Up @@ -495,16 +482,9 @@ define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: mov x10, v0.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
entry:
%in = sext <2 x i32> %src to <2 x i64>
Expand Down
87 changes: 37 additions & 50 deletions llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,22 +108,21 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
;
; CHECK-GI-LABEL: matrix_mul_signed:
; CHECK-GI: // %bb.0: // %vector.header
; CHECK-GI-NEXT: sxth w9, w3
; CHECK-GI-NEXT: sxth w8, w3
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: dup v0.4s, w9
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: .LBB1_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: add x9, x2, w0, sxtw #1
; CHECK-GI-NEXT: subs x8, x8, #8
; CHECK-GI-NEXT: ldp d1, d2, [x9]
; CHECK-GI-NEXT: add x9, x1, w0, sxtw #2
; CHECK-GI-NEXT: add w0, w0, #8
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-GI-NEXT: smull v2.4s, v0.4h, v2.4h
; CHECK-GI-NEXT: stp q1, q2, [x9]
; CHECK-GI-NEXT: b.ne .LBB1_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
Expand Down Expand Up @@ -305,40 +304,39 @@ define void @larger_smull(ptr nocapture noundef readonly %x, i16 noundef %y, ptr
; CHECK-GI-NEXT: b.le .LBB3_7
; CHECK-GI-NEXT: // %bb.1: // %for.body.preheader
; CHECK-GI-NEXT: sxth w8, w1
; CHECK-GI-NEXT: mov x9, xzr
; CHECK-GI-NEXT: mov x10, xzr
; CHECK-GI-NEXT: cmp w3, #16
; CHECK-GI-NEXT: mov w10, w3
; CHECK-GI-NEXT: mov w9, w3
; CHECK-GI-NEXT: b.lo .LBB3_5
; CHECK-GI-NEXT: // %bb.2: // %vector.ph
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: and x9, x10, #0xfffffff0
; CHECK-GI-NEXT: and x10, x9, #0xfffffff0
; CHECK-GI-NEXT: add x11, x2, #32
; CHECK-GI-NEXT: add x12, x0, #16
; CHECK-GI-NEXT: mov x13, x9
; CHECK-GI-NEXT: mov x13, x10
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: .LBB3_3: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldp q1, q2, [x12, #-16]
; CHECK-GI-NEXT: mov x14, x11
; CHECK-GI-NEXT: subs x13, x13, #16
; CHECK-GI-NEXT: add x12, x12, #32
; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0
; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
; CHECK-GI-NEXT: mul v3.4s, v0.4s, v3.4s
; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: mul v4.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: stp q3, q1, [x14, #-32]!
; CHECK-GI-NEXT: stp q4, q2, [x11], #64
; CHECK-GI-NEXT: mov d3, v1.d[1]
; CHECK-GI-NEXT: mov d4, v2.d[1]
; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-GI-NEXT: smull v2.4s, v0.4h, v2.4h
; CHECK-GI-NEXT: smull v3.4s, v0.4h, v3.4h
; CHECK-GI-NEXT: smull v4.4s, v0.4h, v4.4h
; CHECK-GI-NEXT: stp q1, q3, [x14, #-32]!
; CHECK-GI-NEXT: stp q2, q4, [x11], #64
; CHECK-GI-NEXT: b.ne .LBB3_3
; CHECK-GI-NEXT: // %bb.4: // %middle.block
; CHECK-GI-NEXT: cmp x9, x10
; CHECK-GI-NEXT: cmp x10, x9
; CHECK-GI-NEXT: b.eq .LBB3_7
; CHECK-GI-NEXT: .LBB3_5: // %for.body.preheader1
; CHECK-GI-NEXT: add x11, x2, x9, lsl #2
; CHECK-GI-NEXT: add x12, x0, x9, lsl #1
; CHECK-GI-NEXT: sub x9, x10, x9
; CHECK-GI-NEXT: add x11, x2, x10, lsl #2
; CHECK-GI-NEXT: add x12, x0, x10, lsl #1
; CHECK-GI-NEXT: sub x9, x9, x10
; CHECK-GI-NEXT: .LBB3_6: // %for.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrsh w10, [x12], #2
Expand Down Expand Up @@ -834,30 +832,18 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: mov x8, xzr
; CHECK-GI-NEXT: dup v0.2d, v0.d[1]
; CHECK-GI-NEXT: mov x9, v0.d[1]
; CHECK-GI-NEXT: fmov x10, d0
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: .LBB7_1: // %loop
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: fmov x11, d1
; CHECK-GI-NEXT: mov x12, v1.d[1]
; CHECK-GI-NEXT: fmov x13, d0
; CHECK-GI-NEXT: mov x14, v0.d[1]
; CHECK-GI-NEXT: mul x11, x11, x10
; CHECK-GI-NEXT: mul x13, x13, x10
; CHECK-GI-NEXT: mul x12, x12, x9
; CHECK-GI-NEXT: mov v0.d[0], x11
; CHECK-GI-NEXT: mul x11, x14, x9
; CHECK-GI-NEXT: mov v1.d[0], x13
; CHECK-GI-NEXT: mov v0.d[1], x12
; CHECK-GI-NEXT: mov v1.d[1], x11
; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #15
; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #15
; CHECK-GI-NEXT: str q0, [x0], #32
; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: smull v2.2d, v2.2s, v0.2s
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15
; CHECK-GI-NEXT: shrn2 v1.4s, v2.2d, #15
; CHECK-GI-NEXT: str q1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB7_1
; CHECK-GI-NEXT: // %bb.2: // %exit
; CHECK-GI-NEXT: ret
Expand Down Expand Up @@ -971,18 +957,19 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
; CHECK-GI-NEXT: mov x8, xzr
; CHECK-GI-NEXT: dup v0.8h, v0.h[2]
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: .LBB9_1: // %loop
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0
; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
; CHECK-GI-NEXT: mul v2.8h, v2.8h, v0.h[2]
; CHECK-GI-NEXT: mul v1.8h, v1.8h, v0.h[2]
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
; CHECK-GI-NEXT: uzp1 v1.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: str q1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB9_1
; CHECK-GI-NEXT: // %bb.2: // %exit
Expand Down