Skip to content

Commit 148883c

Browse files
committed
Fix latencies for max/min and sra. Update tests.
Change-Id: I65897290c47e2e9c9f8401427e344d7d80a3c060
1 parent 904fd85 commit 148883c

File tree

9 files changed

+98
-75
lines changed

9 files changed

+98
-75
lines changed

llvm/lib/Target/AArch64/AArch64SchedA510.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(AND|EOR|NOT|
440440
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
441441
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
442442
// SIMD max/min, reduce
443-
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>;
443+
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>;
444444
// ASIMD multiply, by element
445445
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$",
446446
"SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>;
@@ -469,8 +469,8 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs PMULLv8i8, PMULLv
469469
// ASIMD pairwise add and accumulate
470470
def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]ADALPv")>;
471471
// ASIMD shift accumulate
472-
def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
473-
def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
472+
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
473+
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
474474
// ASIMD shift accumulate #2
475475
def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]RSRA[vd]")>;
476476
// ASIMD shift by immed

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,10 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
140140
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
141141
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
142142
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
143+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
143144
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
144145
; GISEL-NEXT: usra v1.8h, v0.8h, #1
145-
; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
146-
; GISEL-NEXT: neg v0.8h, v0.8h
146+
; GISEL-NEXT: neg v0.8h, v2.8h
147147
; GISEL-NEXT: ushl v0.8h, v1.8h, v0.8h
148148
; GISEL-NEXT: ret
149149
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -903,10 +903,10 @@ define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
903903
; CHECK: // %bb.0:
904904
; CHECK-NEXT: shl.2s v0, v0, #24
905905
; CHECK-NEXT: shl.2s v1, v1, #24
906+
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
906907
; CHECK-NEXT: sshr.2s v0, v0, #24
907908
; CHECK-NEXT: ssra.2s v0, v1, #24
908-
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
909-
; CHECK-NEXT: and.8b v0, v0, v1
909+
; CHECK-NEXT: and.8b v0, v0, v2
910910
; CHECK-NEXT: ushr.2s v0, v0, #1
911911
; CHECK-NEXT: ret
912912
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
@@ -968,10 +968,10 @@ define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
968968
; CHECK: // %bb.0:
969969
; CHECK-NEXT: shl.4h v0, v0, #8
970970
; CHECK-NEXT: shl.4h v1, v1, #8
971+
; CHECK-NEXT: movi.4h v2, #1
971972
; CHECK-NEXT: sshr.4h v0, v0, #8
972973
; CHECK-NEXT: ssra.4h v0, v1, #8
973-
; CHECK-NEXT: movi.4h v1, #1
974-
; CHECK-NEXT: add.4h v0, v0, v1
974+
; CHECK-NEXT: add.4h v0, v0, v2
975975
; CHECK-NEXT: ushr.4h v0, v0, #1
976976
; CHECK-NEXT: ret
977977
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>

llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,16 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
111111
; CHECK-LABEL: test_srem_odd_undef1:
112112
; CHECK: // %bb.0:
113113
; CHECK-NEXT: mov w8, #34079 // =0x851f
114+
; CHECK-NEXT: movi v3.4s, #25
114115
; CHECK-NEXT: movk w8, #20971, lsl #16
115116
; CHECK-NEXT: dup v1.4s, w8
116117
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
117118
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
118119
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
119120
; CHECK-NEXT: sshr v2.4s, v1.4s, #3
120121
; CHECK-NEXT: usra v2.4s, v1.4s, #31
121-
; CHECK-NEXT: movi v1.4s, #25
122-
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
123122
; CHECK-NEXT: movi v1.4s, #1
123+
; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s
124124
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
125125
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
126126
; CHECK-NEXT: ret
@@ -134,16 +134,16 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
134134
; CHECK-LABEL: test_srem_even_undef1:
135135
; CHECK: // %bb.0:
136136
; CHECK-NEXT: mov w8, #34079 // =0x851f
137+
; CHECK-NEXT: movi v3.4s, #100
137138
; CHECK-NEXT: movk w8, #20971, lsl #16
138139
; CHECK-NEXT: dup v1.4s, w8
139140
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
140141
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
141142
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
142143
; CHECK-NEXT: sshr v2.4s, v1.4s, #5
143144
; CHECK-NEXT: usra v2.4s, v1.4s, #31
144-
; CHECK-NEXT: movi v1.4s, #100
145-
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
146145
; CHECK-NEXT: movi v1.4s, #1
146+
; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s
147147
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
148148
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
149149
; CHECK-NEXT: ret
@@ -201,11 +201,11 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
201201
define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
202202
; CHECK-LABEL: test_srem_int_min:
203203
; CHECK: // %bb.0:
204-
; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
205-
; CHECK-NEXT: mov v2.16b, v0.16b
206-
; CHECK-NEXT: usra v2.4s, v1.4s, #1
204+
; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
205+
; CHECK-NEXT: mov v3.16b, v0.16b
207206
; CHECK-NEXT: movi v1.4s, #128, lsl #24
208-
; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
207+
; CHECK-NEXT: usra v3.4s, v2.4s, #1
208+
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
209209
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
210210
; CHECK-NEXT: movi v1.4s, #1
211211
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0

llvm/test/CodeGen/AArch64/srem-vector-lkk.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,15 @@ define <4 x i32> @fold_srem_v4i32(<4 x i32> %x) {
245245
; CHECK-LABEL: fold_srem_v4i32:
246246
; CHECK: // %bb.0:
247247
; CHECK-NEXT: mov w8, #26215 // =0x6667
248+
; CHECK-NEXT: movi v3.4s, #10
248249
; CHECK-NEXT: movk w8, #26214, lsl #16
249250
; CHECK-NEXT: dup v1.4s, w8
250251
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
251252
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
252253
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
253254
; CHECK-NEXT: sshr v2.4s, v1.4s, #2
254255
; CHECK-NEXT: usra v2.4s, v1.4s, #31
255-
; CHECK-NEXT: movi v1.4s, #10
256-
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
256+
; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s
257257
; CHECK-NEXT: ret
258258
%1 = srem <4 x i32> %x, <i32 10, i32 10, i32 10, i32 10>
259259
ret <4 x i32> %1

llvm/test/CodeGen/AArch64/uadd_sat_vec.ll

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
145145
}
146146

147147
define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
148-
; CHECK-LABEL: v4i8:
149-
; CHECK: // %bb.0:
150-
; CHECK-NEXT: ldr s0, [x0]
151-
; CHECK-NEXT: ldr s1, [x1]
152-
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
153-
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
154-
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
155-
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
156-
; CHECK-NEXT: str s0, [x2]
157-
; CHECK-NEXT: ret
148+
; CHECK-SD-LABEL: v4i8:
149+
; CHECK-SD: // %bb.0:
150+
; CHECK-SD-NEXT: ldr s0, [x0]
151+
; CHECK-SD-NEXT: ldr s1, [x1]
152+
; CHECK-SD-NEXT: movi d2, #0xff00ff00ff00ff
153+
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
154+
; CHECK-SD-NEXT: umin v0.4h, v0.4h, v2.4h
155+
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
156+
; CHECK-SD-NEXT: str s0, [x2]
157+
; CHECK-SD-NEXT: ret
158+
;
159+
; CHECK-GI-LABEL: v4i8:
160+
; CHECK-GI: // %bb.0:
161+
; CHECK-GI-NEXT: ldr w8, [x0]
162+
; CHECK-GI-NEXT: ldr w9, [x1]
163+
; CHECK-GI-NEXT: fmov s0, w8
164+
; CHECK-GI-NEXT: fmov s1, w9
165+
; CHECK-GI-NEXT: mov b2, v0.b[1]
166+
; CHECK-GI-NEXT: mov b3, v1.b[1]
167+
; CHECK-GI-NEXT: mov b4, v0.b[2]
168+
; CHECK-GI-NEXT: mov b5, v0.b[3]
169+
; CHECK-GI-NEXT: mov b6, v1.b[3]
170+
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
171+
; CHECK-GI-NEXT: mov b2, v1.b[2]
172+
; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
173+
; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
174+
; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
175+
; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
176+
; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
177+
; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b
178+
; CHECK-GI-NEXT: fmov w8, s0
179+
; CHECK-GI-NEXT: str w8, [x2]
180+
; CHECK-GI-NEXT: ret
158181
%x = load <4 x i8>, ptr %px
159182
%y = load <4 x i8>, ptr %py
160183
%z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)

llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
4646
; CHECK-LABEL: t32_6_part0:
4747
; CHECK: // %bb.0:
4848
; CHECK-NEXT: adrp x8, .LCPI2_0
49-
; CHECK-NEXT: movi v2.16b, #170
5049
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
5150
; CHECK-NEXT: mov w8, #43691 // =0xaaab
5251
; CHECK-NEXT: movk w8, #43690, lsl #16
5352
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
5453
; CHECK-NEXT: dup v1.4s, w8
5554
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
56-
; CHECK-NEXT: shl v1.4s, v0.4s, #31
57-
; CHECK-NEXT: usra v1.4s, v0.4s, #1
58-
; CHECK-NEXT: fneg v0.4s, v2.4s
59-
; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
55+
; CHECK-NEXT: movi v1.16b, #170
56+
; CHECK-NEXT: shl v2.4s, v0.4s, #31
57+
; CHECK-NEXT: fneg v1.4s, v1.4s
58+
; CHECK-NEXT: usra v2.4s, v0.4s, #1
59+
; CHECK-NEXT: cmhs v0.4s, v1.4s, v2.4s
6060
; CHECK-NEXT: xtn v0.4h, v0.4s
6161
; CHECK-NEXT: ret
6262
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3534,21 +3534,21 @@ entry:
35343534
define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
35353535
; CHECK-SD-LABEL: add_pair_v4i8_v4i64_sext:
35363536
; CHECK-SD: // %bb.0: // %entry
3537-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
35383537
; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
3539-
; CHECK-SD-NEXT: ushll v2.2d, v0.2s, #0
3540-
; CHECK-SD-NEXT: ushll v3.2d, v1.2s, #0
3538+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
3539+
; CHECK-SD-NEXT: ushll v2.2d, v1.2s, #0
3540+
; CHECK-SD-NEXT: ushll v3.2d, v0.2s, #0
35413541
; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
35423542
; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0
3543-
; CHECK-SD-NEXT: shl v2.2d, v2.2d, #56
35443543
; CHECK-SD-NEXT: shl v3.2d, v3.2d, #56
3544+
; CHECK-SD-NEXT: shl v2.2d, v2.2d, #56
35453545
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
35463546
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
3547-
; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #56
35483547
; CHECK-SD-NEXT: sshr v3.2d, v3.2d, #56
3549-
; CHECK-SD-NEXT: ssra v2.2d, v0.2d, #56
3550-
; CHECK-SD-NEXT: ssra v3.2d, v1.2d, #56
3551-
; CHECK-SD-NEXT: add v0.2d, v2.2d, v3.2d
3548+
; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #56
3549+
; CHECK-SD-NEXT: ssra v3.2d, v0.2d, #56
3550+
; CHECK-SD-NEXT: ssra v2.2d, v1.2d, #56
3551+
; CHECK-SD-NEXT: add v0.2d, v3.2d, v2.2d
35523552
; CHECK-SD-NEXT: addp d0, v0.2d
35533553
; CHECK-SD-NEXT: fmov x0, d0
35543554
; CHECK-SD-NEXT: ret

llvm/test/tools/llvm-mca/AArch64/Cortex/A510-neon-instructions.s

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,14 +1800,14 @@ zip2 v0.8h, v0.8h, v0.8h
18001800
# CHECK-NEXT: 1 3 0.50 sshr v0.4s, v0.4s, #3
18011801
# CHECK-NEXT: 1 3 0.50 sshr v0.8b, v0.8b, #3
18021802
# CHECK-NEXT: 1 3 0.50 sshr v0.8h, v0.8h, #3
1803-
# CHECK-NEXT: 1 7 1.00 ssra d18, d12, #21
1804-
# CHECK-NEXT: 1 7 1.00 ssra v0.16b, v0.16b, #3
1805-
# CHECK-NEXT: 1 7 1.00 ssra v0.2d, v0.2d, #3
1806-
# CHECK-NEXT: 1 7 1.00 ssra v0.2s, v0.2s, #3
1807-
# CHECK-NEXT: 1 7 1.00 ssra v0.4h, v0.4h, #3
1808-
# CHECK-NEXT: 1 7 1.00 ssra v0.4s, v0.4s, #3
1809-
# CHECK-NEXT: 1 7 1.00 ssra v0.8b, v0.8b, #3
1810-
# CHECK-NEXT: 1 7 1.00 ssra v0.8h, v0.8h, #3
1803+
# CHECK-NEXT: 1 3 0.50 ssra d18, d12, #21
1804+
# CHECK-NEXT: 1 3 0.50 ssra v0.16b, v0.16b, #3
1805+
# CHECK-NEXT: 1 3 0.50 ssra v0.2d, v0.2d, #3
1806+
# CHECK-NEXT: 1 3 0.50 ssra v0.2s, v0.2s, #3
1807+
# CHECK-NEXT: 1 3 0.50 ssra v0.4h, v0.4h, #3
1808+
# CHECK-NEXT: 1 3 0.50 ssra v0.4s, v0.4s, #3
1809+
# CHECK-NEXT: 1 3 0.50 ssra v0.8b, v0.8b, #3
1810+
# CHECK-NEXT: 1 3 0.50 ssra v0.8h, v0.8h, #3
18111811
# CHECK-NEXT: 1 3 0.50 ssubl v0.2d, v0.2s, v0.2s
18121812
# CHECK-NEXT: 1 3 0.50 ssubl v0.4s, v0.4h, v0.4h
18131813
# CHECK-NEXT: 1 3 0.50 ssubl v0.8h, v0.8b, v0.8b
@@ -2075,14 +2075,14 @@ zip2 v0.8h, v0.8h, v0.8h
20752075
# CHECK-NEXT: 1 4 0.50 usqadd v0.4s, v0.4s
20762076
# CHECK-NEXT: 1 4 0.50 usqadd v0.8b, v0.8b
20772077
# CHECK-NEXT: 1 4 0.50 usqadd v0.8h, v0.8h
2078-
# CHECK-NEXT: 1 7 1.00 usra d20, d13, #61
2079-
# CHECK-NEXT: 1 7 1.00 usra v0.16b, v0.16b, #3
2080-
# CHECK-NEXT: 1 7 1.00 usra v0.2d, v0.2d, #3
2081-
# CHECK-NEXT: 1 7 1.00 usra v0.2s, v0.2s, #3
2082-
# CHECK-NEXT: 1 7 1.00 usra v0.4h, v0.4h, #3
2083-
# CHECK-NEXT: 1 7 1.00 usra v0.4s, v0.4s, #3
2084-
# CHECK-NEXT: 1 7 1.00 usra v0.8b, v0.8b, #3
2085-
# CHECK-NEXT: 1 7 1.00 usra v0.8h, v0.8h, #3
2078+
# CHECK-NEXT: 1 3 0.50 usra d20, d13, #61
2079+
# CHECK-NEXT: 1 3 0.50 usra v0.16b, v0.16b, #3
2080+
# CHECK-NEXT: 1 3 0.50 usra v0.2d, v0.2d, #3
2081+
# CHECK-NEXT: 1 3 0.50 usra v0.2s, v0.2s, #3
2082+
# CHECK-NEXT: 1 3 0.50 usra v0.4h, v0.4h, #3
2083+
# CHECK-NEXT: 1 3 0.50 usra v0.4s, v0.4s, #3
2084+
# CHECK-NEXT: 1 3 0.50 usra v0.8b, v0.8b, #3
2085+
# CHECK-NEXT: 1 3 0.50 usra v0.8h, v0.8h, #3
20862086
# CHECK-NEXT: 1 3 0.50 usubl v0.2d, v0.2s, v0.2s
20872087
# CHECK-NEXT: 1 3 0.50 usubl v0.4s, v0.4h, v0.4h
20882088
# CHECK-NEXT: 1 3 0.50 usubl v0.8h, v0.8b, v0.8b
@@ -2148,7 +2148,7 @@ zip2 v0.8h, v0.8h, v0.8h
21482148

21492149
# CHECK: Resource pressure per iteration:
21502150
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] [7] [8] [9] [10.0] [10.1] [11]
2151-
# CHECK-NEXT: - - - - - 39.00 91.00 - - 509.00 509.00 3.00 3.00 197.00
2151+
# CHECK-NEXT: - - - - - 39.00 91.00 - - 501.00 501.00 3.00 3.00 197.00
21522152

21532153
# CHECK: Resource pressure by instruction:
21542154
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] [7] [8] [9] [10.0] [10.1] [11] Instructions:
@@ -2882,14 +2882,14 @@ zip2 v0.8h, v0.8h, v0.8h
28822882
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - sshr v0.4s, v0.4s, #3
28832883
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - sshr v0.8b, v0.8b, #3
28842884
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - sshr v0.8h, v0.8h, #3
2885-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra d18, d12, #21
2886-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.16b, v0.16b, #3
2887-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.2d, v0.2d, #3
2888-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.2s, v0.2s, #3
2889-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.4h, v0.4h, #3
2890-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.4s, v0.4s, #3
2891-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.8b, v0.8b, #3
2892-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - ssra v0.8h, v0.8h, #3
2885+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra d18, d12, #21
2886+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.16b, v0.16b, #3
2887+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.2d, v0.2d, #3
2888+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.2s, v0.2s, #3
2889+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.4h, v0.4h, #3
2890+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.4s, v0.4s, #3
2891+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.8b, v0.8b, #3
2892+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssra v0.8h, v0.8h, #3
28932893
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssubl v0.2d, v0.2s, v0.2s
28942894
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssubl v0.4s, v0.4h, v0.4h
28952895
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - ssubl v0.8h, v0.8b, v0.8b
@@ -3157,14 +3157,14 @@ zip2 v0.8h, v0.8h, v0.8h
31573157
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usqadd v0.4s, v0.4s
31583158
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usqadd v0.8b, v0.8b
31593159
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usqadd v0.8h, v0.8h
3160-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra d20, d13, #61
3161-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.16b, v0.16b, #3
3162-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.2d, v0.2d, #3
3163-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.2s, v0.2s, #3
3164-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.4h, v0.4h, #3
3165-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.4s, v0.4s, #3
3166-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.8b, v0.8b, #3
3167-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - usra v0.8h, v0.8h, #3
3160+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra d20, d13, #61
3161+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.16b, v0.16b, #3
3162+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.2d, v0.2d, #3
3163+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.2s, v0.2s, #3
3164+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.4h, v0.4h, #3
3165+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.4s, v0.4s, #3
3166+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.8b, v0.8b, #3
3167+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usra v0.8h, v0.8h, #3
31683168
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usubl v0.2d, v0.2s, v0.2s
31693169
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usubl v0.4s, v0.4h, v0.4h
31703170
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - usubl v0.8h, v0.8b, v0.8b

0 commit comments

Comments
 (0)