Skip to content

Commit 4c2d1b4

Browse files
committed
[AArch64] Add test for scalar copysign. NFC
1 parent 42429fe commit 4c2d1b4

File tree

1 file changed

+228
-0
lines changed

1 file changed

+228
-0
lines changed

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,234 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
88

99
target triple = "aarch64-unknown-linux-gnu"
1010

11+
define void @test_copysign_f16(ptr %ap, ptr %bp) {
12+
; SVE-LABEL: test_copysign_f16:
13+
; SVE: // %bb.0:
14+
; SVE-NEXT: adrp x8, .LCPI0_0
15+
; SVE-NEXT: ldr h1, [x0]
16+
; SVE-NEXT: ldr h2, [x1]
17+
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
18+
; SVE-NEXT: adrp x8, .LCPI0_1
19+
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
20+
; SVE-NEXT: mov z3.d, z0.d
21+
; SVE-NEXT: fmov s0, s1
22+
; SVE-NEXT: fmov s3, s2
23+
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
24+
; SVE-NEXT: str h0, [x0]
25+
; SVE-NEXT: ret
26+
;
27+
; SVE2-LABEL: test_copysign_f16:
28+
; SVE2: // %bb.0:
29+
; SVE2-NEXT: adrp x8, .LCPI0_0
30+
; SVE2-NEXT: ldr h1, [x0]
31+
; SVE2-NEXT: ldr h2, [x1]
32+
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
33+
; SVE2-NEXT: adrp x8, .LCPI0_1
34+
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
35+
; SVE2-NEXT: mov z3.d, z0.d
36+
; SVE2-NEXT: fmov s0, s1
37+
; SVE2-NEXT: fmov s3, s2
38+
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
39+
; SVE2-NEXT: str h0, [x0]
40+
; SVE2-NEXT: ret
41+
;
42+
; NONEON-NOSVE-LABEL: test_copysign_f16:
43+
; NONEON-NOSVE: // %bb.0:
44+
; NONEON-NOSVE-NEXT: sub sp, sp, #16
45+
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
46+
; NONEON-NOSVE-NEXT: ldr h0, [x0]
47+
; NONEON-NOSVE-NEXT: ldr h1, [x1]
48+
; NONEON-NOSVE-NEXT: fcvt s0, h0
49+
; NONEON-NOSVE-NEXT: str h1, [sp, #12]
50+
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
51+
; NONEON-NOSVE-NEXT: tst w8, #0x80
52+
; NONEON-NOSVE-NEXT: fabs s0, s0
53+
; NONEON-NOSVE-NEXT: fneg s1, s0
54+
; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
55+
; NONEON-NOSVE-NEXT: fcvt h0, s0
56+
; NONEON-NOSVE-NEXT: str h0, [x0]
57+
; NONEON-NOSVE-NEXT: add sp, sp, #16
58+
; NONEON-NOSVE-NEXT: ret
59+
%a = load half, ptr %ap
60+
%b = load half, ptr %bp
61+
%r = call half @llvm.copysign.f16(half %a, half %b)
62+
store half %r, ptr %ap
63+
ret void
64+
}
65+
66+
define void @test_copysign_bf16(ptr %ap, ptr %bp) {
67+
; SVE-LABEL: test_copysign_bf16:
68+
; SVE: // %bb.0:
69+
; SVE-NEXT: adrp x8, .LCPI1_0
70+
; SVE-NEXT: ldr h1, [x0]
71+
; SVE-NEXT: ldr h2, [x1]
72+
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
73+
; SVE-NEXT: adrp x8, .LCPI1_1
74+
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
75+
; SVE-NEXT: mov z3.d, z0.d
76+
; SVE-NEXT: fmov s0, s1
77+
; SVE-NEXT: fmov s3, s2
78+
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
79+
; SVE-NEXT: str h0, [x0]
80+
; SVE-NEXT: ret
81+
;
82+
; SVE2-LABEL: test_copysign_bf16:
83+
; SVE2: // %bb.0:
84+
; SVE2-NEXT: adrp x8, .LCPI1_0
85+
; SVE2-NEXT: ldr h1, [x0]
86+
; SVE2-NEXT: ldr h2, [x1]
87+
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
88+
; SVE2-NEXT: adrp x8, .LCPI1_1
89+
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
90+
; SVE2-NEXT: mov z3.d, z0.d
91+
; SVE2-NEXT: fmov s0, s1
92+
; SVE2-NEXT: fmov s3, s2
93+
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
94+
; SVE2-NEXT: str h0, [x0]
95+
; SVE2-NEXT: ret
96+
;
97+
; NONEON-NOSVE-LABEL: test_copysign_bf16:
98+
; NONEON-NOSVE: // %bb.0:
99+
; NONEON-NOSVE-NEXT: sub sp, sp, #80
100+
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
101+
; NONEON-NOSVE-NEXT: ldr h0, [x0]
102+
; NONEON-NOSVE-NEXT: ldr h1, [x1]
103+
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
104+
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
105+
; NONEON-NOSVE-NEXT: str h1, [sp, #76]
106+
; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
107+
; NONEON-NOSVE-NEXT: str q0, [sp]
108+
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
109+
; NONEON-NOSVE-NEXT: lsl w9, w8, #16
110+
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
111+
; NONEON-NOSVE-NEXT: lsl w8, w8, #16
112+
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
113+
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
114+
; NONEON-NOSVE-NEXT: lsl w9, w8, #16
115+
; NONEON-NOSVE-NEXT: ldr w8, [sp]
116+
; NONEON-NOSVE-NEXT: lsl w8, w8, #16
117+
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
118+
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77]
119+
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
120+
; NONEON-NOSVE-NEXT: tst w8, #0x80
121+
; NONEON-NOSVE-NEXT: str q0, [sp, #48]
122+
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
123+
; NONEON-NOSVE-NEXT: fabs s0, s0
124+
; NONEON-NOSVE-NEXT: fneg s1, s0
125+
; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
126+
; NONEON-NOSVE-NEXT: fmov w8, s0
127+
; NONEON-NOSVE-NEXT: lsr w8, w8, #16
128+
; NONEON-NOSVE-NEXT: fmov s0, w8
129+
; NONEON-NOSVE-NEXT: str h0, [x0]
130+
; NONEON-NOSVE-NEXT: add sp, sp, #80
131+
; NONEON-NOSVE-NEXT: ret
132+
%a = load bfloat, ptr %ap
133+
%b = load bfloat, ptr %bp
134+
%r = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
135+
store bfloat %r, ptr %ap
136+
ret void
137+
}
138+
139+
define void @test_copysign_f32(ptr %ap, ptr %bp) {
140+
; SVE-LABEL: test_copysign_f32:
141+
; SVE: // %bb.0:
142+
; SVE-NEXT: adrp x8, .LCPI2_0
143+
; SVE-NEXT: ldr s1, [x0]
144+
; SVE-NEXT: ldr s2, [x1]
145+
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
146+
; SVE-NEXT: adrp x8, .LCPI2_1
147+
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
148+
; SVE-NEXT: mov z3.d, z0.d
149+
; SVE-NEXT: fmov s0, s1
150+
; SVE-NEXT: fmov s3, s2
151+
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
152+
; SVE-NEXT: str s0, [x0]
153+
; SVE-NEXT: ret
154+
;
155+
; SVE2-LABEL: test_copysign_f32:
156+
; SVE2: // %bb.0:
157+
; SVE2-NEXT: adrp x8, .LCPI2_0
158+
; SVE2-NEXT: ldr s1, [x0]
159+
; SVE2-NEXT: ldr s2, [x1]
160+
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
161+
; SVE2-NEXT: adrp x8, .LCPI2_1
162+
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
163+
; SVE2-NEXT: mov z3.d, z0.d
164+
; SVE2-NEXT: fmov s0, s1
165+
; SVE2-NEXT: fmov s3, s2
166+
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
167+
; SVE2-NEXT: str s0, [x0]
168+
; SVE2-NEXT: ret
169+
;
170+
; NONEON-NOSVE-LABEL: test_copysign_f32:
171+
; NONEON-NOSVE: // %bb.0:
172+
; NONEON-NOSVE-NEXT: ldr s0, [x0]
173+
; NONEON-NOSVE-NEXT: ldr w8, [x1]
174+
; NONEON-NOSVE-NEXT: fabs s0, s0
175+
; NONEON-NOSVE-NEXT: tst w8, #0x80000000
176+
; NONEON-NOSVE-NEXT: fneg s1, s0
177+
; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
178+
; NONEON-NOSVE-NEXT: str s0, [x0]
179+
; NONEON-NOSVE-NEXT: ret
180+
%a = load float, ptr %ap
181+
%b = load float, ptr %bp
182+
%r = call float @llvm.copysign.f32(float %a, float %b)
183+
store float %r, ptr %ap
184+
ret void
185+
}
186+
187+
define void @test_copysign_f64(ptr %ap, ptr %bp) {
188+
; SVE-LABEL: test_copysign_f64:
189+
; SVE: // %bb.0:
190+
; SVE-NEXT: adrp x8, .LCPI3_1
191+
; SVE-NEXT: ptrue p0.d, vl2
192+
; SVE-NEXT: ldr d2, [x0]
193+
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
194+
; SVE-NEXT: adrp x8, .LCPI3_0
195+
; SVE-NEXT: ldr d3, [x1]
196+
; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
197+
; SVE-NEXT: fneg z0.d, p0/m, z0.d
198+
; SVE-NEXT: mov z4.d, z1.d
199+
; SVE-NEXT: fmov d1, d2
200+
; SVE-NEXT: fmov d4, d3
201+
; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
202+
; SVE-NEXT: str d0, [x0]
203+
; SVE-NEXT: ret
204+
;
205+
; SVE2-LABEL: test_copysign_f64:
206+
; SVE2: // %bb.0:
207+
; SVE2-NEXT: adrp x8, .LCPI3_1
208+
; SVE2-NEXT: ptrue p0.d, vl2
209+
; SVE2-NEXT: ldr d2, [x0]
210+
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
211+
; SVE2-NEXT: adrp x8, .LCPI3_0
212+
; SVE2-NEXT: ldr d3, [x1]
213+
; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
214+
; SVE2-NEXT: fneg z0.d, p0/m, z0.d
215+
; SVE2-NEXT: mov z4.d, z1.d
216+
; SVE2-NEXT: fmov d1, d2
217+
; SVE2-NEXT: fmov d4, d3
218+
; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
219+
; SVE2-NEXT: str d0, [x0]
220+
; SVE2-NEXT: ret
221+
;
222+
; NONEON-NOSVE-LABEL: test_copysign_f64:
223+
; NONEON-NOSVE: // %bb.0:
224+
; NONEON-NOSVE-NEXT: ldr d0, [x0]
225+
; NONEON-NOSVE-NEXT: ldr x8, [x1]
226+
; NONEON-NOSVE-NEXT: fabs d0, d0
227+
; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
228+
; NONEON-NOSVE-NEXT: fneg d1, d0
229+
; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
230+
; NONEON-NOSVE-NEXT: str d0, [x0]
231+
; NONEON-NOSVE-NEXT: ret
232+
%a = load double, ptr %ap
233+
%b = load double, ptr %bp
234+
%r = call double @llvm.copysign.f64(double %a, double %b)
235+
store double %r, ptr %ap
236+
ret void
237+
}
238+
11239
;============ f16
12240

13241
define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) {

0 commit comments

Comments
 (0)