Skip to content

Commit a947be5

Browse files
committed
[ARM] Various tests for MVE and FP16 codegen. NFC
1 parent f76c424 commit a947be5

File tree

4 files changed

+391
-1
lines changed

4 files changed

+391
-1
lines changed

llvm/test/CodeGen/ARM/fp16-bitcast.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,41 @@ entry:
4747
%tmp4.0.insert.ext = zext i16 %2 to i32
4848
ret i32 %tmp4.0.insert.ext
4949
}
50+
51+
define half @load_i16(i16 *%hp) {
52+
; CHECK-VFPV4-LABEL: load_i16:
53+
; CHECK-VFPV4: @ %bb.0: @ %entry
54+
; CHECK-VFPV4-NEXT: vmov.f32 s0, #1.000000e+00
55+
; CHECK-VFPV4-NEXT: ldrh r0, [r0]
56+
; CHECK-VFPV4-NEXT: vmov s2, r0
57+
; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2
58+
; CHECK-VFPV4-NEXT: vadd.f32 s0, s2, s0
59+
; CHECK-VFPV4-NEXT: vmov r0, s0
60+
; CHECK-VFPV4-NEXT: bx lr
61+
;
62+
; CHECK-FP16-LABEL: load_i16:
63+
; CHECK-FP16: @ %bb.0: @ %entry
64+
; CHECK-FP16-NEXT: vldr.16 s2, [r1]
65+
; CHECK-FP16-NEXT: vmov.f16 s0, #1.000000e+00
66+
; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0
67+
; CHECK-FP16-NEXT: vstr.16 s0, [r0]
68+
; CHECK-FP16-NEXT: bx lr
69+
entry:
70+
%h = load i16, i16 *%hp, align 2
71+
%hc = bitcast i16 %h to half
72+
%add = fadd half %hc, 1.0
73+
ret half %add
74+
}
75+
76+
define i16 @load_f16(half *%hp) {
77+
; CHECK-LABEL: load_f16:
78+
; CHECK: @ %bb.0: @ %entry
79+
; CHECK-NEXT: ldrh r0, [r0]
80+
; CHECK-NEXT: adds r0, #1
81+
; CHECK-NEXT: bx lr
82+
entry:
83+
%h = load half, half *%hp, align 2
84+
%hc = bitcast half %h to i16
85+
%add = add i16 %hc, 1
86+
ret i16 %add
87+
}

llvm/test/CodeGen/Thumb2/mve-vaddqr.ll

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) {
55
; CHECK-LABEL: vaddqr_v4i32:
@@ -72,3 +72,123 @@ entry:
7272
%c = add <16 x i8> %sp, %src
7373
ret <16 x i8> %c
7474
}
75+
76+
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
77+
; CHECK-LABEL: vaddqr_v4f32:
78+
; CHECK: @ %bb.0: @ %entry
79+
; CHECK-NEXT: vmov r0, s4
80+
; CHECK-NEXT: vadd.f32 q0, q0, r0
81+
; CHECK-NEXT: bx lr
82+
entry:
83+
%i = insertelement <4 x float> undef, float %src2, i32 0
84+
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
85+
%c = fadd <4 x float> %src, %sp
86+
ret <4 x float> %c
87+
}
88+
89+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
90+
; CHECK-LABEL: vaddqr_v8f16:
91+
; CHECK: @ %bb.0: @ %entry
92+
; CHECK-NEXT: ldrh r0, [r0]
93+
; CHECK-NEXT: vadd.f16 q0, q0, r0
94+
; CHECK-NEXT: bx lr
95+
entry:
96+
%src2 = load half, half *%src2p, align 2
97+
%i = insertelement <8 x half> undef, half %src2, i32 0
98+
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
99+
%c = fadd <8 x half> %src, %sp
100+
ret <8 x half> %c
101+
}
102+
103+
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_2(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
104+
; CHECK-LABEL: vaddqr_v4f32_2:
105+
; CHECK: @ %bb.0: @ %entry
106+
; CHECK-NEXT: vmov r0, s4
107+
; CHECK-NEXT: vadd.f32 q0, q0, r0
108+
; CHECK-NEXT: bx lr
109+
entry:
110+
%i = insertelement <4 x float> undef, float %src2, i32 0
111+
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
112+
%c = fadd <4 x float> %sp, %src
113+
ret <4 x float> %c
114+
}
115+
116+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
117+
; CHECK-LABEL: vaddqr_v8f16_2:
118+
; CHECK: @ %bb.0: @ %entry
119+
; CHECK-NEXT: ldrh r0, [r0]
120+
; CHECK-NEXT: vadd.f16 q0, q0, r0
121+
; CHECK-NEXT: bx lr
122+
entry:
123+
%src2 = load half, half *%src2p, align 2
124+
%i = insertelement <8 x half> undef, half %src2, i32 0
125+
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
126+
%c = fadd <8 x half> %sp, %src
127+
ret <8 x half> %c
128+
}
129+
130+
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_3(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
131+
; CHECK-LABEL: vaddqr_v4f32_3:
132+
; CHECK: @ %bb.0: @ %entry
133+
; CHECK-NEXT: vmov r0, s4
134+
; CHECK-NEXT: vdup.32 q1, r0
135+
; CHECK-NEXT: vadd.f32 q0, q0, q1
136+
; CHECK-NEXT: bx lr
137+
entry:
138+
%src2bc = bitcast float %src2 to i32
139+
%i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
140+
%spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
141+
%sp = bitcast <4 x i32> %spbc to <4 x float>
142+
%c = fadd <4 x float> %src, %sp
143+
ret <4 x float> %c
144+
}
145+
146+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
147+
; CHECK-LABEL: vaddqr_v8f16_3:
148+
; CHECK: @ %bb.0: @ %entry
149+
; CHECK-NEXT: ldrh r0, [r0]
150+
; CHECK-NEXT: vdup.16 q1, r0
151+
; CHECK-NEXT: vadd.f16 q0, q0, q1
152+
; CHECK-NEXT: bx lr
153+
entry:
154+
%src2 = load half, half *%src2p, align 2
155+
%src2bc = bitcast half %src2 to i16
156+
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
157+
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
158+
%sp = bitcast <8 x i16> %spbc to <8 x half>
159+
%c = fadd <8 x half> %src, %sp
160+
ret <8 x half> %c
161+
}
162+
163+
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_4(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
164+
; CHECK-LABEL: vaddqr_v4f32_4:
165+
; CHECK: @ %bb.0: @ %entry
166+
; CHECK-NEXT: vmov r0, s4
167+
; CHECK-NEXT: vdup.32 q1, r0
168+
; CHECK-NEXT: vadd.f32 q0, q1, q0
169+
; CHECK-NEXT: bx lr
170+
entry:
171+
%src2bc = bitcast float %src2 to i32
172+
%i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
173+
%spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
174+
%sp = bitcast <4 x i32> %spbc to <4 x float>
175+
%c = fadd <4 x float> %sp, %src
176+
ret <4 x float> %c
177+
}
178+
179+
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
180+
; CHECK-LABEL: vaddqr_v8f16_4:
181+
; CHECK: @ %bb.0: @ %entry
182+
; CHECK-NEXT: ldrh r0, [r0]
183+
; CHECK-NEXT: vdup.16 q1, r0
184+
; CHECK-NEXT: vadd.f16 q0, q1, q0
185+
; CHECK-NEXT: bx lr
186+
entry:
187+
%src2 = load half, half *%src2p, align 2
188+
%src2bc = bitcast half %src2 to i16
189+
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
190+
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
191+
%sp = bitcast <8 x i16> %spbc to <8 x half>
192+
%c = fadd <8 x half> %sp, %src
193+
ret <8 x half> %c
194+
}

llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5192,3 +5192,138 @@ entry:
51925192
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
51935193
ret <8 x half> %s
51945194
}
5195+
5196+
5197+
5198+
define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
5199+
; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
5200+
; CHECK-MVE: @ %bb.0: @ %entry
5201+
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
5202+
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
5203+
; CHECK-MVE-NEXT: ldrh r0, [r0]
5204+
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
5205+
; CHECK-MVE-NEXT: movs r2, #0
5206+
; CHECK-MVE-NEXT: movs r1, #0
5207+
; CHECK-MVE-NEXT: vdup.16 q4, r0
5208+
; CHECK-MVE-NEXT: movs r0, #0
5209+
; CHECK-MVE-NEXT: vmovx.f16 s14, s16
5210+
; CHECK-MVE-NEXT: vmovx.f16 s22, s17
5211+
; CHECK-MVE-NEXT: vcmp.f16 s12, s14
5212+
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
5213+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5214+
; CHECK-MVE-NEXT: it eq
5215+
; CHECK-MVE-NEXT: moveq r0, #1
5216+
; CHECK-MVE-NEXT: cmp r0, #0
5217+
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
5218+
; CHECK-MVE-NEXT: cset r0, ne
5219+
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
5220+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5221+
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
5222+
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
5223+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5224+
; CHECK-MVE-NEXT: it eq
5225+
; CHECK-MVE-NEXT: moveq r2, #1
5226+
; CHECK-MVE-NEXT: cmp r2, #0
5227+
; CHECK-MVE-NEXT: cset r2, ne
5228+
; CHECK-MVE-NEXT: vmov r0, s12
5229+
; CHECK-MVE-NEXT: lsls r2, r2, #31
5230+
; CHECK-MVE-NEXT: vcmp.f16 s1, s17
5231+
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
5232+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5233+
; CHECK-MVE-NEXT: vmov r2, s12
5234+
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
5235+
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
5236+
; CHECK-MVE-NEXT: mov.w r0, #0
5237+
; CHECK-MVE-NEXT: it eq
5238+
; CHECK-MVE-NEXT: moveq r0, #1
5239+
; CHECK-MVE-NEXT: cmp r0, #0
5240+
; CHECK-MVE-NEXT: cset r0, ne
5241+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5242+
; CHECK-MVE-NEXT: vseleq.f16 s20, s9, s5
5243+
; CHECK-MVE-NEXT: vmov r0, s20
5244+
; CHECK-MVE-NEXT: vmovx.f16 s20, s1
5245+
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
5246+
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
5247+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5248+
; CHECK-MVE-NEXT: mov.w r0, #0
5249+
; CHECK-MVE-NEXT: it eq
5250+
; CHECK-MVE-NEXT: moveq r0, #1
5251+
; CHECK-MVE-NEXT: cmp r0, #0
5252+
; CHECK-MVE-NEXT: cset r0, ne
5253+
; CHECK-MVE-NEXT: vmovx.f16 s20, s5
5254+
; CHECK-MVE-NEXT: vmovx.f16 s22, s9
5255+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5256+
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
5257+
; CHECK-MVE-NEXT: vcmp.f16 s2, s18
5258+
; CHECK-MVE-NEXT: vmov r0, s20
5259+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5260+
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
5261+
; CHECK-MVE-NEXT: mov.w r0, #0
5262+
; CHECK-MVE-NEXT: it eq
5263+
; CHECK-MVE-NEXT: moveq r0, #1
5264+
; CHECK-MVE-NEXT: cmp r0, #0
5265+
; CHECK-MVE-NEXT: cset r0, ne
5266+
; CHECK-MVE-NEXT: vmovx.f16 s22, s18
5267+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5268+
; CHECK-MVE-NEXT: vseleq.f16 s20, s10, s6
5269+
; CHECK-MVE-NEXT: vmov r0, s20
5270+
; CHECK-MVE-NEXT: vmovx.f16 s20, s2
5271+
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
5272+
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
5273+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5274+
; CHECK-MVE-NEXT: mov.w r0, #0
5275+
; CHECK-MVE-NEXT: it eq
5276+
; CHECK-MVE-NEXT: moveq r0, #1
5277+
; CHECK-MVE-NEXT: cmp r0, #0
5278+
; CHECK-MVE-NEXT: cset r0, ne
5279+
; CHECK-MVE-NEXT: vmovx.f16 s20, s6
5280+
; CHECK-MVE-NEXT: vmovx.f16 s22, s10
5281+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5282+
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
5283+
; CHECK-MVE-NEXT: vcmp.f16 s3, s19
5284+
; CHECK-MVE-NEXT: vmov r0, s20
5285+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5286+
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
5287+
; CHECK-MVE-NEXT: mov.w r0, #0
5288+
; CHECK-MVE-NEXT: it eq
5289+
; CHECK-MVE-NEXT: moveq r0, #1
5290+
; CHECK-MVE-NEXT: cmp r0, #0
5291+
; CHECK-MVE-NEXT: cset r0, ne
5292+
; CHECK-MVE-NEXT: vmovx.f16 s2, s19
5293+
; CHECK-MVE-NEXT: vcmp.f16 s0, s2
5294+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5295+
; CHECK-MVE-NEXT: vseleq.f16 s20, s11, s7
5296+
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
5297+
; CHECK-MVE-NEXT: it eq
5298+
; CHECK-MVE-NEXT: moveq r1, #1
5299+
; CHECK-MVE-NEXT: vmov r0, s20
5300+
; CHECK-MVE-NEXT: cmp r1, #0
5301+
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
5302+
; CHECK-MVE-NEXT: cset r0, ne
5303+
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
5304+
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
5305+
; CHECK-MVE-NEXT: lsls r0, r0, #31
5306+
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
5307+
; CHECK-MVE-NEXT: vmov r0, s0
5308+
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
5309+
; CHECK-MVE-NEXT: vmov q0, q3
5310+
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
5311+
; CHECK-MVE-NEXT: bx lr
5312+
;
5313+
; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
5314+
; CHECK-MVEFP: @ %bb.0: @ %entry
5315+
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
5316+
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
5317+
; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, q3
5318+
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
5319+
; CHECK-MVEFP-NEXT: bx lr
5320+
entry:
5321+
%src2 = load half, half* %src2p
5322+
%src2bc = bitcast half %src2 to i16
5323+
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
5324+
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
5325+
%sp = bitcast <8 x i16> %spbc to <8 x half>
5326+
%c = fcmp oeq <8 x half> %src, %sp
5327+
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
5328+
ret <8 x half> %s
5329+
}

0 commit comments

Comments
 (0)