Skip to content

Commit d4c41e8

Browse files
committed
[GlobalISel] Add computeNumSignBits for G_SHUFFLE_VECTOR
The code is similar to computeKnownBits and the code in SelectionDAG::ComputeNumSignBits
1 parent e618a79 commit d4c41e8

File tree

3 files changed

+51
-57
lines changed

3 files changed

+51
-57
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
1515
#include "llvm/ADT/StringExtras.h"
1616
#include "llvm/Analysis/ValueTracking.h"
17+
#include "llvm/Analysis/VectorUtils.h"
1718
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1819
#include "llvm/CodeGen/GlobalISel/Utils.h"
1920
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -836,6 +837,28 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
836837
return TyBits - 1; // Every always-zero bit is a sign bit.
837838
break;
838839
}
840+
case TargetOpcode::G_SHUFFLE_VECTOR: {
841+
// Collect the minimum number of sign bits that are shared by every vector
842+
// element referenced by the shuffle.
843+
APInt DemandedLHS, DemandedRHS;
844+
unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
845+
if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
846+
DemandedElts, DemandedLHS, DemandedRHS))
847+
return 1;
848+
849+
if (!!DemandedLHS)
850+
FirstAnswer =
851+
computeNumSignBits(MI.getOperand(1).getReg(), DemandedLHS, Depth + 1);
852+
// If we don't know anything, early out and try computeKnownBits fall-back.
853+
if (FirstAnswer == 1)
854+
break;
855+
if (!!DemandedRHS) {
856+
unsigned Tmp2 =
857+
computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
858+
FirstAnswer = std::min(FirstAnswer, Tmp2);
859+
}
860+
break;
861+
}
839862
case TargetOpcode::G_INTRINSIC:
840863
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
841864
case TargetOpcode::G_INTRINSIC_CONVERGENT:

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,9 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
240240
; CHECK-GI-NEXT: and x8, x0, #0xffff
241241
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
242242
; CHECK-GI-NEXT: dup v1.2d, x8
243-
; CHECK-GI-NEXT: fmov x8, d1
244-
; CHECK-GI-NEXT: fmov x9, d0
245-
; CHECK-GI-NEXT: mov x10, v1.d[1]
246-
; CHECK-GI-NEXT: mov x11, v0.d[1]
247-
; CHECK-GI-NEXT: mul x8, x8, x9
248-
; CHECK-GI-NEXT: mul x9, x10, x11
249-
; CHECK-GI-NEXT: mov v0.d[0], x8
250-
; CHECK-GI-NEXT: mov v0.d[1], x9
243+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
244+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
245+
; CHECK-GI-NEXT: smull v0.2d, v1.2s, v0.2s
251246
; CHECK-GI-NEXT: ret
252247
entry:
253248
%in = zext i16 %src to i64
@@ -419,9 +414,10 @@ define <8 x i16> @missing_insert(<8 x i8> %b) {
419414
;
420415
; CHECK-GI-LABEL: missing_insert:
421416
; CHECK-GI: // %bb.0: // %entry
422-
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
423-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4
424-
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
417+
; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
418+
; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4
419+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
420+
; CHECK-GI-NEXT: smull v0.8h, v1.8b, v0.8b
425421
; CHECK-GI-NEXT: ret
426422
entry:
427423
%ext.b = sext <8 x i8> %b to <8 x i16>
@@ -440,10 +436,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
440436
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
441437
; CHECK-GI: // %bb.0: // %entry
442438
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
443-
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
444439
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
445440
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
446-
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
441+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
442+
; CHECK-GI-NEXT: smull v0.8h, v0.8b, v1.8b
447443
; CHECK-GI-NEXT: ret
448444
entry:
449445
%in = sext <8 x i8> %src to <8 x i16>
@@ -463,16 +459,9 @@ define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
463459
; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
464460
; CHECK-GI: // %bb.0: // %entry
465461
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
466-
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
467462
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
468-
; CHECK-GI-NEXT: fmov x9, d1
469-
; CHECK-GI-NEXT: mov x11, v1.d[1]
470-
; CHECK-GI-NEXT: fmov x8, d0
471-
; CHECK-GI-NEXT: mov x10, v0.d[1]
472-
; CHECK-GI-NEXT: mul x8, x8, x9
473-
; CHECK-GI-NEXT: mul x9, x10, x11
474-
; CHECK-GI-NEXT: mov v0.d[0], x8
475-
; CHECK-GI-NEXT: mov v0.d[1], x9
463+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
464+
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
476465
; CHECK-GI-NEXT: ret
477466
entry:
478467
%in = sext <2 x i32> %src to <2 x i64>
@@ -515,16 +504,9 @@ define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
515504
; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
516505
; CHECK-GI: // %bb.0: // %entry
517506
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
518-
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
519507
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
520-
; CHECK-GI-NEXT: fmov x9, d1
521-
; CHECK-GI-NEXT: mov x11, v1.d[1]
522-
; CHECK-GI-NEXT: fmov x8, d0
523-
; CHECK-GI-NEXT: mov x10, v0.d[1]
524-
; CHECK-GI-NEXT: mul x8, x8, x9
525-
; CHECK-GI-NEXT: mul x9, x10, x11
526-
; CHECK-GI-NEXT: mov v0.d[0], x8
527-
; CHECK-GI-NEXT: mov v0.d[1], x9
508+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
509+
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
528510
; CHECK-GI-NEXT: ret
529511
entry:
530512
%in = sext <2 x i32> %src to <2 x i64>

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -843,30 +843,18 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
843843
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
844844
; CHECK-GI-NEXT: mov x8, xzr
845845
; CHECK-GI-NEXT: dup v0.2d, v0.d[1]
846-
; CHECK-GI-NEXT: mov x9, v0.d[1]
847-
; CHECK-GI-NEXT: fmov x10, d0
846+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
848847
; CHECK-GI-NEXT: .LBB7_1: // %loop
849848
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
850-
; CHECK-GI-NEXT: ldr q0, [x0]
849+
; CHECK-GI-NEXT: ldr q1, [x0]
851850
; CHECK-GI-NEXT: subs x2, x2, #8
852851
; CHECK-GI-NEXT: add x8, x8, #8
853-
; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
854-
; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
855-
; CHECK-GI-NEXT: fmov x11, d1
856-
; CHECK-GI-NEXT: mov x12, v1.d[1]
857-
; CHECK-GI-NEXT: fmov x13, d0
858-
; CHECK-GI-NEXT: mov x14, v0.d[1]
859-
; CHECK-GI-NEXT: mul x11, x11, x10
860-
; CHECK-GI-NEXT: mul x13, x13, x10
861-
; CHECK-GI-NEXT: mul x12, x12, x9
862-
; CHECK-GI-NEXT: mov v0.d[0], x11
863-
; CHECK-GI-NEXT: mul x11, x14, x9
864-
; CHECK-GI-NEXT: mov v1.d[0], x13
865-
; CHECK-GI-NEXT: mov v0.d[1], x12
866-
; CHECK-GI-NEXT: mov v1.d[1], x11
867-
; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #15
868-
; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #15
869-
; CHECK-GI-NEXT: str q0, [x0], #32
852+
; CHECK-GI-NEXT: mov d2, v1.d[1]
853+
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v0.2s
854+
; CHECK-GI-NEXT: smull v2.2d, v2.2s, v0.2s
855+
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15
856+
; CHECK-GI-NEXT: shrn2 v1.4s, v2.2d, #15
857+
; CHECK-GI-NEXT: str q1, [x0], #32
870858
; CHECK-GI-NEXT: b.ne .LBB7_1
871859
; CHECK-GI-NEXT: // %bb.2: // %exit
872860
; CHECK-GI-NEXT: ret
@@ -979,18 +967,19 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
979967
; CHECK-GI: // %bb.0: // %entry
980968
; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
981969
; CHECK-GI-NEXT: mov x8, xzr
970+
; CHECK-GI-NEXT: dup v0.8h, v0.h[2]
971+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
982972
; CHECK-GI-NEXT: .LBB9_1: // %loop
983973
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
984974
; CHECK-GI-NEXT: ldr q1, [x0]
985975
; CHECK-GI-NEXT: subs x2, x2, #8
986976
; CHECK-GI-NEXT: add x8, x8, #8
987-
; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0
988-
; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
989-
; CHECK-GI-NEXT: mul v2.8h, v2.8h, v0.h[2]
990-
; CHECK-GI-NEXT: mul v1.8h, v1.8h, v0.h[2]
991-
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
977+
; CHECK-GI-NEXT: mov d2, v1.d[1]
978+
; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b
979+
; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b
992980
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
993-
; CHECK-GI-NEXT: uzp1 v1.16b, v2.16b, v1.16b
981+
; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
982+
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b
994983
; CHECK-GI-NEXT: str q1, [x0], #32
995984
; CHECK-GI-NEXT: b.ne .LBB9_1
996985
; CHECK-GI-NEXT: // %bb.2: // %exit

0 commit comments

Comments
 (0)