Skip to content

Commit 722385e

Browse files
authored
[GlobalISel] Add G_SHUFFLE_VECTOR computeKnownBits (#139501)
The code is taken from SelectionDAG::computeKnownBits.
1 parent c5ec668 commit 722385e

File tree

5 files changed

+158
-100
lines changed

5 files changed

+158
-100
lines changed

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
1515
#include "llvm/ADT/StringExtras.h"
1616
#include "llvm/Analysis/ValueTracking.h"
17+
#include "llvm/Analysis/VectorUtils.h"
1718
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1819
#include "llvm/CodeGen/GlobalISel/Utils.h"
1920
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -629,6 +630,33 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
629630
Known.Zero.setBitsFrom(LowBits);
630631
break;
631632
}
633+
case TargetOpcode::G_SHUFFLE_VECTOR: {
634+
APInt DemandedLHS, DemandedRHS;
635+
// Collect the known bits that are shared by every vector element referenced
636+
// by the shuffle.
637+
unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
638+
if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
639+
DemandedElts, DemandedLHS, DemandedRHS))
640+
break;
641+
642+
// Known bits are the values that are shared by every demanded element.
643+
Known.Zero.setAllBits();
644+
Known.One.setAllBits();
645+
if (!!DemandedLHS) {
646+
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
647+
Depth + 1);
648+
Known = Known.intersectWith(Known2);
649+
}
650+
// If we don't know any bits, early out.
651+
if (Known.isUnknown())
652+
break;
653+
if (!!DemandedRHS) {
654+
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
655+
Depth + 1);
656+
Known = Known.intersectWith(Known2);
657+
}
658+
break;
659+
}
632660
}
633661

634662
LLVM_DEBUG(dumpResult(MI, Known, Depth));
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -filetype=null 2>&1 | FileCheck %s
3+
4+
---
5+
name: lane0
6+
body: |
7+
bb.1:
8+
; CHECK-LABEL: name: @lane0
9+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
10+
; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1
11+
; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
12+
%0:_(<2 x s8>) = COPY $h0
13+
%1:_(<2 x s8>) = COPY $h1
14+
%2:_(<2 x s8>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0)
15+
...
16+
---
17+
name: zext_known
18+
body: |
19+
bb.1:
20+
; CHECK-LABEL: name: @zext_known
21+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
22+
; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
23+
; CHECK-NEXT: %2:_ KnownBits:00000000???????? SignBits:8
24+
; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
25+
%0:_(<2 x s8>) = COPY $h0
26+
%1:_(<2 x s16>) = COPY $s1
27+
%2:_(<2 x s16>) = G_ZEXT %0
28+
%3:_(<2 x s16>) = G_SHUFFLE_VECTOR %2, %1, shufflemask(0, 0)
29+
...
30+
---
31+
name: zext_unknown
32+
body: |
33+
bb.1:
34+
; CHECK-LABEL: name: @zext_unknown
35+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
36+
; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
37+
; CHECK-NEXT: %2:_ KnownBits:00000000???????? SignBits:8
38+
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
39+
%0:_(<2 x s8>) = COPY $h0
40+
%1:_(<2 x s16>) = COPY $s1
41+
%2:_(<2 x s16>) = G_ZEXT %0
42+
%3:_(<2 x s16>) = G_SHUFFLE_VECTOR %2, %1, shufflemask(0, 2)
43+
...
44+
---
45+
name: sext_known
46+
body: |
47+
bb.1:
48+
; CHECK-LABEL: name: @sext_known
49+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
50+
; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
51+
; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:9
52+
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
53+
%0:_(<2 x s8>) = COPY $h0
54+
%1:_(<2 x s16>) = COPY $s1
55+
%2:_(<2 x s16>) = G_SEXT %0
56+
%3:_(<2 x s16>) = G_SHUFFLE_VECTOR %2, %1, shufflemask(0, 0)
57+
...
58+
---
59+
name: sext_unknown
60+
body: |
61+
bb.1:
62+
; CHECK-LABEL: name: @sext_unknown
63+
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
64+
; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
65+
; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:9
66+
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
67+
%0:_(<2 x s8>) = COPY $h0
68+
%1:_(<2 x s16>) = COPY $s1
69+
%2:_(<2 x s16>) = G_SEXT %0
70+
%3:_(<2 x s16>) = G_SHUFFLE_VECTOR %2, %1, shufflemask(0, 2)
71+
...

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
3838
; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
3939
; CHECK-GI: // %bb.0: // %entry
4040
; CHECK-GI-NEXT: and w8, w0, #0xff
41-
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
4241
; CHECK-GI-NEXT: dup v1.8h, w8
43-
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
42+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
43+
; CHECK-GI-NEXT: umull v0.8h, v1.8b, v0.8b
4444
; CHECK-GI-NEXT: ret
4545
entry:
4646
%in = zext i8 %src to i16
@@ -84,9 +84,9 @@ define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
8484
; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
8585
; CHECK-GI: // %bb.0: // %entry
8686
; CHECK-GI-NEXT: and w8, w0, #0xffff
87-
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
8887
; CHECK-GI-NEXT: dup v1.4s, w8
89-
; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
88+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
89+
; CHECK-GI-NEXT: umull v0.4s, v1.4h, v0.4h
9090
; CHECK-GI-NEXT: ret
9191
entry:
9292
%in = zext i16 %src to i32
@@ -138,16 +138,9 @@ define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
138138
; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
139139
; CHECK-GI: // %bb.0: // %entry
140140
; CHECK-GI-NEXT: mov w8, w0
141-
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
142141
; CHECK-GI-NEXT: dup v1.2d, x8
143-
; CHECK-GI-NEXT: fmov x9, d0
144-
; CHECK-GI-NEXT: mov x11, v0.d[1]
145-
; CHECK-GI-NEXT: fmov x8, d1
146-
; CHECK-GI-NEXT: mov x10, v1.d[1]
147-
; CHECK-GI-NEXT: mul x8, x8, x9
148-
; CHECK-GI-NEXT: mul x9, x10, x11
149-
; CHECK-GI-NEXT: mov v0.d[0], x8
150-
; CHECK-GI-NEXT: mov v0.d[1], x9
142+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
143+
; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
151144
; CHECK-GI-NEXT: ret
152145
entry:
153146
%in = zext i32 %src to i64
@@ -169,16 +162,9 @@ define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
169162
; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
170163
; CHECK-GI: // %bb.0: // %entry
171164
; CHECK-GI-NEXT: mov w8, w0
172-
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
173165
; CHECK-GI-NEXT: dup v1.2d, x8
174-
; CHECK-GI-NEXT: fmov x9, d0
175-
; CHECK-GI-NEXT: mov x11, v0.d[1]
176-
; CHECK-GI-NEXT: fmov x8, d1
177-
; CHECK-GI-NEXT: mov x10, v1.d[1]
178-
; CHECK-GI-NEXT: mul x8, x8, x9
179-
; CHECK-GI-NEXT: mul x9, x10, x11
180-
; CHECK-GI-NEXT: mov v0.d[0], x8
181-
; CHECK-GI-NEXT: mov v0.d[1], x9
166+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
167+
; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
182168
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
183169
; CHECK-GI-NEXT: ret
184170
entry:
@@ -240,14 +226,9 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
240226
; CHECK-GI-NEXT: and x8, x0, #0xffff
241227
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
242228
; CHECK-GI-NEXT: dup v1.2d, x8
243-
; CHECK-GI-NEXT: fmov x8, d1
244-
; CHECK-GI-NEXT: fmov x9, d0
245-
; CHECK-GI-NEXT: mov x10, v1.d[1]
246-
; CHECK-GI-NEXT: mov x11, v0.d[1]
247-
; CHECK-GI-NEXT: mul x8, x8, x9
248-
; CHECK-GI-NEXT: mul x9, x10, x11
249-
; CHECK-GI-NEXT: mov v0.d[0], x8
250-
; CHECK-GI-NEXT: mov v0.d[1], x9
229+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
230+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
231+
; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
251232
; CHECK-GI-NEXT: ret
252233
entry:
253234
%in = zext i16 %src to i64
@@ -491,10 +472,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
491472
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
492473
; CHECK-GI: // %bb.0: // %entry
493474
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
494-
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
495475
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
496476
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
497-
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
477+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
478+
; CHECK-GI-NEXT: umull v0.8h, v0.8b, v1.8b
498479
; CHECK-GI-NEXT: ret
499480
entry:
500481
%in = zext <8 x i8> %src to <8 x i16>
@@ -545,8 +526,8 @@ define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x
545526
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
546527
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
547528
; CHECK-GI-NEXT: trn1 v0.8h, v0.8h, v1.8h
548-
; CHECK-GI-NEXT: ushll v1.8h, v2.8b, #0
549-
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
529+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
530+
; CHECK-GI-NEXT: umull v0.8h, v0.8b, v2.8b
550531
; CHECK-GI-NEXT: ret
551532
entry:
552533
%in1 = zext <8 x i8> %src1 to <8 x i16>

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 34 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,16 @@ define void @matrix_mul_unsigned(i32 %N, ptr nocapture %C, ptr nocapture readonl
2828
; CHECK-GI-NEXT: dup v0.4s, w8
2929
; CHECK-GI-NEXT: mov w8, w0
3030
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
31+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
3132
; CHECK-GI-NEXT: .LBB0_1: // %vector.body
3233
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
3334
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
3435
; CHECK-GI-NEXT: subs x8, x8, #8
3536
; CHECK-GI-NEXT: ldp d1, d2, [x9]
3637
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
3738
; CHECK-GI-NEXT: add w0, w0, #8
38-
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
39-
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
40-
; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
41-
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
39+
; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
40+
; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
4241
; CHECK-GI-NEXT: stp q1, q2, [x9]
4342
; CHECK-GI-NEXT: b.ne .LBB0_1
4443
; CHECK-GI-NEXT: // %bb.2: // %for.end12
@@ -478,22 +477,21 @@ define void @larger_umull(ptr nocapture noundef readonly %x, i16 noundef %y, ptr
478477
; CHECK-GI-NEXT: mov x12, x8
479478
; CHECK-GI-NEXT: .LBB4_3: // %vector.body
480479
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
481-
; CHECK-GI-NEXT: ldp q0, q1, [x11, #-16]
482480
; CHECK-GI-NEXT: and w13, w1, #0xffff
483-
; CHECK-GI-NEXT: dup v2.4s, w13
481+
; CHECK-GI-NEXT: ldp q1, q2, [x11, #-16]
482+
; CHECK-GI-NEXT: dup v0.4s, w13
484483
; CHECK-GI-NEXT: mov x13, x10
485484
; CHECK-GI-NEXT: subs x12, x12, #16
486485
; CHECK-GI-NEXT: add x11, x11, #32
487-
; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
488-
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
489-
; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0
490-
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
491-
; CHECK-GI-NEXT: mul v3.4s, v2.4s, v3.4s
492-
; CHECK-GI-NEXT: mul v0.4s, v2.4s, v0.4s
493-
; CHECK-GI-NEXT: mul v4.4s, v2.4s, v4.4s
494-
; CHECK-GI-NEXT: mul v1.4s, v2.4s, v1.4s
495-
; CHECK-GI-NEXT: stp q3, q0, [x13, #-32]!
496-
; CHECK-GI-NEXT: stp q4, q1, [x10], #64
486+
; CHECK-GI-NEXT: mov d3, v1.d[1]
487+
; CHECK-GI-NEXT: mov d4, v2.d[1]
488+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
489+
; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
490+
; CHECK-GI-NEXT: umull v3.4s, v0.4h, v3.4h
491+
; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
492+
; CHECK-GI-NEXT: umull v0.4s, v0.4h, v4.4h
493+
; CHECK-GI-NEXT: stp q1, q3, [x13, #-32]!
494+
; CHECK-GI-NEXT: stp q2, q0, [x10], #64
497495
; CHECK-GI-NEXT: b.ne .LBB4_3
498496
; CHECK-GI-NEXT: // %bb.4: // %middle.block
499497
; CHECK-GI-NEXT: cmp x8, x9
@@ -775,22 +773,15 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
775773
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
776774
; CHECK-GI-NEXT: mov x8, xzr
777775
; CHECK-GI-NEXT: dup v0.2d, v0.d[1]
778-
; CHECK-GI-NEXT: mov x9, v0.d[1]
779-
; CHECK-GI-NEXT: fmov x10, d0
776+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
780777
; CHECK-GI-NEXT: .LBB6_1: // %loop
781778
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
782-
; CHECK-GI-NEXT: ldr d0, [x0]
779+
; CHECK-GI-NEXT: ldr d1, [x0]
783780
; CHECK-GI-NEXT: subs x2, x2, #8
784781
; CHECK-GI-NEXT: add x8, x8, #8
785-
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
786-
; CHECK-GI-NEXT: fmov x11, d0
787-
; CHECK-GI-NEXT: mov x12, v0.d[1]
788-
; CHECK-GI-NEXT: mul x11, x11, x10
789-
; CHECK-GI-NEXT: mul x12, x12, x9
790-
; CHECK-GI-NEXT: mov v0.d[0], x11
791-
; CHECK-GI-NEXT: mov v0.d[1], x12
792-
; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #15
793-
; CHECK-GI-NEXT: str d0, [x0], #32
782+
; CHECK-GI-NEXT: umull v1.2d, v1.2s, v0.2s
783+
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15
784+
; CHECK-GI-NEXT: str d1, [x0], #32
794785
; CHECK-GI-NEXT: b.ne .LBB6_1
795786
; CHECK-GI-NEXT: // %bb.2: // %exit
796787
; CHECK-GI-NEXT: ret
@@ -917,13 +908,14 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
917908
; CHECK-GI: // %bb.0: // %entry
918909
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
919910
; CHECK-GI-NEXT: mov x8, xzr
911+
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
912+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
920913
; CHECK-GI-NEXT: .LBB8_1: // %loop
921914
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
922915
; CHECK-GI-NEXT: ldr d1, [x0]
923916
; CHECK-GI-NEXT: subs x2, x2, #8
924917
; CHECK-GI-NEXT: add x8, x8, #8
925-
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
926-
; CHECK-GI-NEXT: mul v1.8h, v1.8h, v0.h[0]
918+
; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b
927919
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
928920
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
929921
; CHECK-GI-NEXT: str d1, [x0], #32
@@ -1046,17 +1038,16 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea
10461038
; CHECK-GI-NEXT: dup v0.4s, w8
10471039
; CHECK-GI-NEXT: mov w8, w0
10481040
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
1041+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
10491042
; CHECK-GI-NEXT: .LBB10_1: // %vector.body
10501043
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
10511044
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
10521045
; CHECK-GI-NEXT: subs x8, x8, #8
10531046
; CHECK-GI-NEXT: ldp d1, d2, [x9]
10541047
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
10551048
; CHECK-GI-NEXT: add w0, w0, #8
1056-
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
1057-
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
1058-
; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
1059-
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
1049+
; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
1050+
; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
10601051
; CHECK-GI-NEXT: stp q1, q2, [x9]
10611052
; CHECK-GI-NEXT: b.ne .LBB10_1
10621053
; CHECK-GI-NEXT: // %bb.2: // %for.end12
@@ -1135,6 +1126,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
11351126
; CHECK-GI-NEXT: dup v0.4s, w8
11361127
; CHECK-GI-NEXT: mov w8, w0
11371128
; CHECK-GI-NEXT: and x8, x8, #0xfffffff0
1129+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
11381130
; CHECK-GI-NEXT: .LBB11_1: // %vector.body
11391131
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
11401132
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
@@ -1143,16 +1135,14 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
11431135
; CHECK-GI-NEXT: ldur q2, [x9, #8]
11441136
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
11451137
; CHECK-GI-NEXT: add w0, w0, #16
1146-
; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
1147-
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
1148-
; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
1149-
; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
1150-
; CHECK-GI-NEXT: mul v3.4s, v0.4s, v3.4s
1151-
; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
1152-
; CHECK-GI-NEXT: mul v4.4s, v0.4s, v4.4s
1153-
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
1154-
; CHECK-GI-NEXT: stp q3, q1, [x9]
1155-
; CHECK-GI-NEXT: stp q4, q2, [x9, #32]!
1138+
; CHECK-GI-NEXT: mov d3, v1.d[1]
1139+
; CHECK-GI-NEXT: mov d4, v2.d[1]
1140+
; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
1141+
; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
1142+
; CHECK-GI-NEXT: umull v3.4s, v0.4h, v3.4h
1143+
; CHECK-GI-NEXT: umull v4.4s, v0.4h, v4.4h
1144+
; CHECK-GI-NEXT: stp q1, q3, [x9]
1145+
; CHECK-GI-NEXT: stp q2, q4, [x9, #32]!
11561146
; CHECK-GI-NEXT: b.ne .LBB11_1
11571147
; CHECK-GI-NEXT: // %bb.2: // %for.end12
11581148
; CHECK-GI-NEXT: ret

0 commit comments

Comments
 (0)