@@ -8,6 +8,234 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
8
8
9
9
target triple = "aarch64-unknown-linux-gnu"
10
10
11
+ define void @test_copysign_f16 (ptr %ap , ptr %bp ) {
12
+ ; SVE-LABEL: test_copysign_f16:
13
+ ; SVE: // %bb.0:
14
+ ; SVE-NEXT: adrp x8, .LCPI0_0
15
+ ; SVE-NEXT: ldr h1, [x0]
16
+ ; SVE-NEXT: ldr h2, [x1]
17
+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
18
+ ; SVE-NEXT: adrp x8, .LCPI0_1
19
+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
20
+ ; SVE-NEXT: mov z3.d, z0.d
21
+ ; SVE-NEXT: fmov s0, s1
22
+ ; SVE-NEXT: fmov s3, s2
23
+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
24
+ ; SVE-NEXT: str h0, [x0]
25
+ ; SVE-NEXT: ret
26
+ ;
27
+ ; SVE2-LABEL: test_copysign_f16:
28
+ ; SVE2: // %bb.0:
29
+ ; SVE2-NEXT: adrp x8, .LCPI0_0
30
+ ; SVE2-NEXT: ldr h1, [x0]
31
+ ; SVE2-NEXT: ldr h2, [x1]
32
+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
33
+ ; SVE2-NEXT: adrp x8, .LCPI0_1
34
+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
35
+ ; SVE2-NEXT: mov z3.d, z0.d
36
+ ; SVE2-NEXT: fmov s0, s1
37
+ ; SVE2-NEXT: fmov s3, s2
38
+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
39
+ ; SVE2-NEXT: str h0, [x0]
40
+ ; SVE2-NEXT: ret
41
+ ;
42
+ ; NONEON-NOSVE-LABEL: test_copysign_f16:
43
+ ; NONEON-NOSVE: // %bb.0:
44
+ ; NONEON-NOSVE-NEXT: sub sp, sp, #16
45
+ ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
46
+ ; NONEON-NOSVE-NEXT: ldr h0, [x0]
47
+ ; NONEON-NOSVE-NEXT: ldr h1, [x1]
48
+ ; NONEON-NOSVE-NEXT: fcvt s0, h0
49
+ ; NONEON-NOSVE-NEXT: str h1, [sp, #12]
50
+ ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
51
+ ; NONEON-NOSVE-NEXT: tst w8, #0x80
52
+ ; NONEON-NOSVE-NEXT: fabs s0, s0
53
+ ; NONEON-NOSVE-NEXT: fneg s1, s0
54
+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
55
+ ; NONEON-NOSVE-NEXT: fcvt h0, s0
56
+ ; NONEON-NOSVE-NEXT: str h0, [x0]
57
+ ; NONEON-NOSVE-NEXT: add sp, sp, #16
58
+ ; NONEON-NOSVE-NEXT: ret
59
+ %a = load half , ptr %ap
60
+ %b = load half , ptr %bp
61
+ %r = call half @llvm.copysign.f16 (half %a , half %b )
62
+ store half %r , ptr %ap
63
+ ret void
64
+ }
65
+
66
+ define void @test_copysign_bf16 (ptr %ap , ptr %bp ) {
67
+ ; SVE-LABEL: test_copysign_bf16:
68
+ ; SVE: // %bb.0:
69
+ ; SVE-NEXT: adrp x8, .LCPI1_0
70
+ ; SVE-NEXT: ldr h1, [x0]
71
+ ; SVE-NEXT: ldr h2, [x1]
72
+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
73
+ ; SVE-NEXT: adrp x8, .LCPI1_1
74
+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
75
+ ; SVE-NEXT: mov z3.d, z0.d
76
+ ; SVE-NEXT: fmov s0, s1
77
+ ; SVE-NEXT: fmov s3, s2
78
+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
79
+ ; SVE-NEXT: str h0, [x0]
80
+ ; SVE-NEXT: ret
81
+ ;
82
+ ; SVE2-LABEL: test_copysign_bf16:
83
+ ; SVE2: // %bb.0:
84
+ ; SVE2-NEXT: adrp x8, .LCPI1_0
85
+ ; SVE2-NEXT: ldr h1, [x0]
86
+ ; SVE2-NEXT: ldr h2, [x1]
87
+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
88
+ ; SVE2-NEXT: adrp x8, .LCPI1_1
89
+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
90
+ ; SVE2-NEXT: mov z3.d, z0.d
91
+ ; SVE2-NEXT: fmov s0, s1
92
+ ; SVE2-NEXT: fmov s3, s2
93
+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
94
+ ; SVE2-NEXT: str h0, [x0]
95
+ ; SVE2-NEXT: ret
96
+ ;
97
+ ; NONEON-NOSVE-LABEL: test_copysign_bf16:
98
+ ; NONEON-NOSVE: // %bb.0:
99
+ ; NONEON-NOSVE-NEXT: sub sp, sp, #80
100
+ ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
101
+ ; NONEON-NOSVE-NEXT: ldr h0, [x0]
102
+ ; NONEON-NOSVE-NEXT: ldr h1, [x1]
103
+ ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
104
+ ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
105
+ ; NONEON-NOSVE-NEXT: str h1, [sp, #76]
106
+ ; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
107
+ ; NONEON-NOSVE-NEXT: str q0, [sp]
108
+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
109
+ ; NONEON-NOSVE-NEXT: lsl w9, w8, #16
110
+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
111
+ ; NONEON-NOSVE-NEXT: lsl w8, w8, #16
112
+ ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
113
+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
114
+ ; NONEON-NOSVE-NEXT: lsl w9, w8, #16
115
+ ; NONEON-NOSVE-NEXT: ldr w8, [sp]
116
+ ; NONEON-NOSVE-NEXT: lsl w8, w8, #16
117
+ ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
118
+ ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77]
119
+ ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
120
+ ; NONEON-NOSVE-NEXT: tst w8, #0x80
121
+ ; NONEON-NOSVE-NEXT: str q0, [sp, #48]
122
+ ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
123
+ ; NONEON-NOSVE-NEXT: fabs s0, s0
124
+ ; NONEON-NOSVE-NEXT: fneg s1, s0
125
+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
126
+ ; NONEON-NOSVE-NEXT: fmov w8, s0
127
+ ; NONEON-NOSVE-NEXT: lsr w8, w8, #16
128
+ ; NONEON-NOSVE-NEXT: fmov s0, w8
129
+ ; NONEON-NOSVE-NEXT: str h0, [x0]
130
+ ; NONEON-NOSVE-NEXT: add sp, sp, #80
131
+ ; NONEON-NOSVE-NEXT: ret
132
+ %a = load bfloat, ptr %ap
133
+ %b = load bfloat, ptr %bp
134
+ %r = call bfloat @llvm.copysign.bf16 (bfloat %a , bfloat %b )
135
+ store bfloat %r , ptr %ap
136
+ ret void
137
+ }
138
+
139
+ define void @test_copysign_f32 (ptr %ap , ptr %bp ) {
140
+ ; SVE-LABEL: test_copysign_f32:
141
+ ; SVE: // %bb.0:
142
+ ; SVE-NEXT: adrp x8, .LCPI2_0
143
+ ; SVE-NEXT: ldr s1, [x0]
144
+ ; SVE-NEXT: ldr s2, [x1]
145
+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
146
+ ; SVE-NEXT: adrp x8, .LCPI2_1
147
+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
148
+ ; SVE-NEXT: mov z3.d, z0.d
149
+ ; SVE-NEXT: fmov s0, s1
150
+ ; SVE-NEXT: fmov s3, s2
151
+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
152
+ ; SVE-NEXT: str s0, [x0]
153
+ ; SVE-NEXT: ret
154
+ ;
155
+ ; SVE2-LABEL: test_copysign_f32:
156
+ ; SVE2: // %bb.0:
157
+ ; SVE2-NEXT: adrp x8, .LCPI2_0
158
+ ; SVE2-NEXT: ldr s1, [x0]
159
+ ; SVE2-NEXT: ldr s2, [x1]
160
+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
161
+ ; SVE2-NEXT: adrp x8, .LCPI2_1
162
+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
163
+ ; SVE2-NEXT: mov z3.d, z0.d
164
+ ; SVE2-NEXT: fmov s0, s1
165
+ ; SVE2-NEXT: fmov s3, s2
166
+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
167
+ ; SVE2-NEXT: str s0, [x0]
168
+ ; SVE2-NEXT: ret
169
+ ;
170
+ ; NONEON-NOSVE-LABEL: test_copysign_f32:
171
+ ; NONEON-NOSVE: // %bb.0:
172
+ ; NONEON-NOSVE-NEXT: ldr s0, [x0]
173
+ ; NONEON-NOSVE-NEXT: ldr w8, [x1]
174
+ ; NONEON-NOSVE-NEXT: fabs s0, s0
175
+ ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
176
+ ; NONEON-NOSVE-NEXT: fneg s1, s0
177
+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
178
+ ; NONEON-NOSVE-NEXT: str s0, [x0]
179
+ ; NONEON-NOSVE-NEXT: ret
180
+ %a = load float , ptr %ap
181
+ %b = load float , ptr %bp
182
+ %r = call float @llvm.copysign.f32 (float %a , float %b )
183
+ store float %r , ptr %ap
184
+ ret void
185
+ }
186
+
187
+ define void @test_copysign_f64 (ptr %ap , ptr %bp ) {
188
+ ; SVE-LABEL: test_copysign_f64:
189
+ ; SVE: // %bb.0:
190
+ ; SVE-NEXT: adrp x8, .LCPI3_1
191
+ ; SVE-NEXT: ptrue p0.d, vl2
192
+ ; SVE-NEXT: ldr d2, [x0]
193
+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
194
+ ; SVE-NEXT: adrp x8, .LCPI3_0
195
+ ; SVE-NEXT: ldr d3, [x1]
196
+ ; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
197
+ ; SVE-NEXT: fneg z0.d, p0/m, z0.d
198
+ ; SVE-NEXT: mov z4.d, z1.d
199
+ ; SVE-NEXT: fmov d1, d2
200
+ ; SVE-NEXT: fmov d4, d3
201
+ ; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
202
+ ; SVE-NEXT: str d0, [x0]
203
+ ; SVE-NEXT: ret
204
+ ;
205
+ ; SVE2-LABEL: test_copysign_f64:
206
+ ; SVE2: // %bb.0:
207
+ ; SVE2-NEXT: adrp x8, .LCPI3_1
208
+ ; SVE2-NEXT: ptrue p0.d, vl2
209
+ ; SVE2-NEXT: ldr d2, [x0]
210
+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
211
+ ; SVE2-NEXT: adrp x8, .LCPI3_0
212
+ ; SVE2-NEXT: ldr d3, [x1]
213
+ ; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
214
+ ; SVE2-NEXT: fneg z0.d, p0/m, z0.d
215
+ ; SVE2-NEXT: mov z4.d, z1.d
216
+ ; SVE2-NEXT: fmov d1, d2
217
+ ; SVE2-NEXT: fmov d4, d3
218
+ ; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
219
+ ; SVE2-NEXT: str d0, [x0]
220
+ ; SVE2-NEXT: ret
221
+ ;
222
+ ; NONEON-NOSVE-LABEL: test_copysign_f64:
223
+ ; NONEON-NOSVE: // %bb.0:
224
+ ; NONEON-NOSVE-NEXT: ldr d0, [x0]
225
+ ; NONEON-NOSVE-NEXT: ldr x8, [x1]
226
+ ; NONEON-NOSVE-NEXT: fabs d0, d0
227
+ ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
228
+ ; NONEON-NOSVE-NEXT: fneg d1, d0
229
+ ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
230
+ ; NONEON-NOSVE-NEXT: str d0, [x0]
231
+ ; NONEON-NOSVE-NEXT: ret
232
+ %a = load double , ptr %ap
233
+ %b = load double , ptr %bp
234
+ %r = call double @llvm.copysign.f64 (double %a , double %b )
235
+ store double %r , ptr %ap
236
+ ret void
237
+ }
238
+
11
239
;============ f16
12
240
13
241
define void @test_copysign_v4f16_v4f16 (ptr %ap , ptr %bp ) {
0 commit comments