Skip to content

Commit 68fc0c4

Browse files
committed
[GlobalISel] Add computeKnownBits for G_SHUFFLE_VECTOR
The code is similar to computeKnownBits and the code in SelectionDAG::ComputeNumSignBits
1 parent 153da95 commit 68fc0c4

File tree

3 files changed

+49
-49
lines changed

3 files changed

+49
-49
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,30 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
874874
SrcTy.getScalarSizeInBits());
875875
break;
876876
}
877+
case TargetOpcode::G_SHUFFLE_VECTOR: {
878+
// Collect the minimum number of sign bits that are shared by every vector
879+
// element referenced by the shuffle.
880+
APInt DemandedLHS, DemandedRHS;
881+
unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
882+
if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
883+
DemandedElts, DemandedLHS, DemandedRHS))
884+
return 1;
885+
886+
unsigned Tmp = std::numeric_limits<unsigned>::max();
887+
if (!!DemandedLHS)
888+
Tmp =
889+
computeNumSignBits(MI.getOperand(1).getReg(), DemandedLHS, Depth + 1);
890+
if (!!DemandedRHS) {
891+
unsigned Tmp2 =
892+
computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
893+
Tmp = std::min(Tmp, Tmp2);
894+
}
895+
// If we don't know anything, early out and try computeKnownBits fall-back.
896+
if (Tmp == 1)
897+
break;
898+
assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
899+
return Tmp;
900+
}
877901
case TargetOpcode::G_INTRINSIC:
878902
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
879903
case TargetOpcode::G_INTRINSIC_CONVERGENT:

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -400,9 +400,10 @@ define <8 x i16> @missing_insert(<8 x i8> %b) {
400400
;
401401
; CHECK-GI-LABEL: missing_insert:
402402
; CHECK-GI: // %bb.0: // %entry
403-
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
404-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4
405-
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
403+
; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
404+
; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4
405+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
406+
; CHECK-GI-NEXT: smull v0.8h, v1.8b, v0.8b
406407
; CHECK-GI-NEXT: ret
407408
entry:
408409
%ext.b = sext <8 x i8> %b to <8 x i16>
@@ -421,10 +422,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
421422
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
422423
; CHECK-GI: // %bb.0: // %entry
423424
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
424-
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
425425
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
426426
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
427-
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
427+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
428+
; CHECK-GI-NEXT: smull v0.8h, v0.8b, v1.8b
428429
; CHECK-GI-NEXT: ret
429430
entry:
430431
%in = sext <8 x i8> %src to <8 x i16>
@@ -444,16 +445,9 @@ define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
444445
; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
445446
; CHECK-GI: // %bb.0: // %entry
446447
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
447-
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
448448
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
449-
; CHECK-GI-NEXT: fmov x9, d1
450-
; CHECK-GI-NEXT: mov x11, v1.d[1]
451-
; CHECK-GI-NEXT: fmov x8, d0
452-
; CHECK-GI-NEXT: mov x10, v0.d[1]
453-
; CHECK-GI-NEXT: mul x8, x8, x9
454-
; CHECK-GI-NEXT: mul x9, x10, x11
455-
; CHECK-GI-NEXT: mov v0.d[0], x8
456-
; CHECK-GI-NEXT: mov v0.d[1], x9
449+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
450+
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
457451
; CHECK-GI-NEXT: ret
458452
entry:
459453
%in = sext <2 x i32> %src to <2 x i64>
@@ -496,16 +490,9 @@ define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
496490
; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
497491
; CHECK-GI: // %bb.0: // %entry
498492
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
499-
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
500493
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
501-
; CHECK-GI-NEXT: fmov x9, d1
502-
; CHECK-GI-NEXT: mov x11, v1.d[1]
503-
; CHECK-GI-NEXT: fmov x8, d0
504-
; CHECK-GI-NEXT: mov x10, v0.d[1]
505-
; CHECK-GI-NEXT: mul x8, x8, x9
506-
; CHECK-GI-NEXT: mul x9, x10, x11
507-
; CHECK-GI-NEXT: mov v0.d[0], x8
508-
; CHECK-GI-NEXT: mov v0.d[1], x9
494+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
495+
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
509496
; CHECK-GI-NEXT: ret
510497
entry:
511498
%in = sext <2 x i32> %src to <2 x i64>

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -834,30 +834,18 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
834834
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
835835
; CHECK-GI-NEXT: mov x8, xzr
836836
; CHECK-GI-NEXT: dup v0.2d, v0.d[1]
837-
; CHECK-GI-NEXT: mov x9, v0.d[1]
838-
; CHECK-GI-NEXT: fmov x10, d0
837+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
839838
; CHECK-GI-NEXT: .LBB7_1: // %loop
840839
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
841-
; CHECK-GI-NEXT: ldr q0, [x0]
840+
; CHECK-GI-NEXT: ldr q1, [x0]
842841
; CHECK-GI-NEXT: subs x2, x2, #8
843842
; CHECK-GI-NEXT: add x8, x8, #8
844-
; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
845-
; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
846-
; CHECK-GI-NEXT: fmov x11, d1
847-
; CHECK-GI-NEXT: mov x12, v1.d[1]
848-
; CHECK-GI-NEXT: fmov x13, d0
849-
; CHECK-GI-NEXT: mov x14, v0.d[1]
850-
; CHECK-GI-NEXT: mul x11, x11, x10
851-
; CHECK-GI-NEXT: mul x13, x13, x10
852-
; CHECK-GI-NEXT: mul x12, x12, x9
853-
; CHECK-GI-NEXT: mov v0.d[0], x11
854-
; CHECK-GI-NEXT: mul x11, x14, x9
855-
; CHECK-GI-NEXT: mov v1.d[0], x13
856-
; CHECK-GI-NEXT: mov v0.d[1], x12
857-
; CHECK-GI-NEXT: mov v1.d[1], x11
858-
; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #15
859-
; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #15
860-
; CHECK-GI-NEXT: str q0, [x0], #32
843+
; CHECK-GI-NEXT: mov d2, v1.d[1]
844+
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v0.2s
845+
; CHECK-GI-NEXT: smull v2.2d, v2.2s, v0.2s
846+
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15
847+
; CHECK-GI-NEXT: shrn2 v1.4s, v2.2d, #15
848+
; CHECK-GI-NEXT: str q1, [x0], #32
861849
; CHECK-GI-NEXT: b.ne .LBB7_1
862850
; CHECK-GI-NEXT: // %bb.2: // %exit
863851
; CHECK-GI-NEXT: ret
@@ -971,18 +959,19 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
971959
; CHECK-GI: // %bb.0: // %entry
972960
; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
973961
; CHECK-GI-NEXT: mov x8, xzr
962+
; CHECK-GI-NEXT: dup v0.8h, v0.h[2]
963+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
974964
; CHECK-GI-NEXT: .LBB9_1: // %loop
975965
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
976966
; CHECK-GI-NEXT: ldr q1, [x0]
977967
; CHECK-GI-NEXT: subs x2, x2, #8
978968
; CHECK-GI-NEXT: add x8, x8, #8
979-
; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0
980-
; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
981-
; CHECK-GI-NEXT: mul v2.8h, v2.8h, v0.h[2]
982-
; CHECK-GI-NEXT: mul v1.8h, v1.8h, v0.h[2]
983-
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
969+
; CHECK-GI-NEXT: mov d2, v1.d[1]
970+
; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b
971+
; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b
984972
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
985-
; CHECK-GI-NEXT: uzp1 v1.16b, v2.16b, v1.16b
973+
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
974+
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b
986975
; CHECK-GI-NEXT: str q1, [x0], #32
987976
; CHECK-GI-NEXT: b.ne .LBB9_1
988977
; CHECK-GI-NEXT: // %bb.2: // %exit

0 commit comments

Comments
 (0)