3
3
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \
4
4
; RUN: | FileCheck %s --check-prefixes=CHECK,Z16
5
5
;
6
- ; Test copysign intrinsics with half .
6
+ ; Test copysign intrinsics.
7
7
8
8
declare half @llvm.copysign.f16 (half , half )
9
9
declare float @llvm.copysign.f32 (float , float )
@@ -43,53 +43,25 @@ define half @f2(half %a, double %b) {
43
43
}
44
44
45
45
; Test copysign with an f16 result and f128 sign argument.
46
- ; TODO: Let the DAGCombiner remove the fp_round.
47
46
define half @f3 (half %a , fp128 %b ) {
48
47
; Z10-LABEL: f3:
49
48
; Z10: # %bb.0:
50
- ; Z10-NEXT: stmg %r14, %r15, 112(%r15)
51
- ; Z10-NEXT: .cfi_offset %r14, -48
52
- ; Z10-NEXT: .cfi_offset %r15, -40
53
- ; Z10-NEXT: aghi %r15, -184
54
- ; Z10-NEXT: .cfi_def_cfa_offset 344
55
- ; Z10-NEXT: std %f8, 176(%r15) # 8-byte Spill
56
- ; Z10-NEXT: .cfi_offset %f8, -168
57
49
; Z10-NEXT: ld %f1, 0(%r2)
58
50
; Z10-NEXT: ld %f3, 8(%r2)
59
- ; Z10-NEXT: ler %f8, %f0
60
- ; Z10-NEXT: la %r2, 160(%r15)
61
- ; Z10-NEXT: std %f1, 160(%r15)
62
- ; Z10-NEXT: std %f3, 168(%r15)
63
- ; Z10-NEXT: brasl %r14, __trunctfhf2@PLT
64
- ; Z10-NEXT: cpsdr %f0, %f0, %f8
65
- ; Z10-NEXT: ld %f8, 176(%r15) # 8-byte Reload
66
- ; Z10-NEXT: lmg %r14, %r15, 296(%r15)
51
+ ; Z10-NEXT: cpsdr %f0, %f1, %f0
67
52
; Z10-NEXT: br %r14
68
53
;
69
54
; Z16-LABEL: f3:
70
55
; Z16: # %bb.0:
71
- ; Z16-NEXT: stmg %r14, %r15, 112(%r15)
72
- ; Z16-NEXT: .cfi_offset %r14, -48
73
- ; Z16-NEXT: .cfi_offset %r15, -40
74
- ; Z16-NEXT: aghi %r15, -184
75
- ; Z16-NEXT: .cfi_def_cfa_offset 344
76
- ; Z16-NEXT: std %f8, 176(%r15) # 8-byte Spill
77
- ; Z16-NEXT: .cfi_offset %f8, -168
78
- ; Z16-NEXT: ldr %f8, %f0
79
- ; Z16-NEXT: vl %v0, 0(%r2), 3
80
- ; Z16-NEXT: la %r2, 160(%r15)
81
- ; Z16-NEXT: vst %v0, 160(%r15), 3
82
- ; Z16-NEXT: brasl %r14, __trunctfhf2@PLT
83
- ; Z16-NEXT: cpsdr %f0, %f0, %f8
84
- ; Z16-NEXT: ld %f8, 176(%r15) # 8-byte Reload
85
- ; Z16-NEXT: lmg %r14, %r15, 296(%r15)
56
+ ; Z16-NEXT: vl %v1, 0(%r2), 3
57
+ ; Z16-NEXT: cpsdr %f0, %f1, %f0
86
58
; Z16-NEXT: br %r14
87
59
%bh = fptrunc fp128 %b to half
88
60
%res = call half @llvm.copysign.f16 (half %a , half %bh )
89
61
ret half %res
90
62
}
91
63
92
- ; Test copysign with an f32 result and half sign argument.
64
+ ; Test copysign with an f32 result and f16 sign argument.
93
65
define float @f4 (float %a , half %b ) {
94
66
; CHECK-LABEL: f4:
95
67
; CHECK: # %bb.0:
@@ -100,20 +72,100 @@ define float @f4(float %a, half %b) {
100
72
ret float %res
101
73
}
102
74
103
- ; Test copysign with an f64 result and half sign argument.
104
- define double @f5 (double %a , half %b ) {
75
+ ; Test copysign with an f32 result and f32 sign argument.
76
+ define float @f5 (float %a , float %b ) {
105
77
; CHECK-LABEL: f5:
106
78
; CHECK: # %bb.0:
107
79
; CHECK-NEXT: cpsdr %f0, %f2, %f0
80
+ ; CHECK-NEXT: br %r14
81
+ %res = call float @llvm.copysign.f32 (float %a , float %b )
82
+ ret float %res
83
+ }
84
+
85
+ ; Test copysign with an f32 result and f64 sign argument.
86
+ define float @f6 (float %a , double %b ) {
87
+ ; CHECK-LABEL: f6:
88
+ ; CHECK: # %bb.0:
89
+ ; CHECK-NEXT: cpsdr %f0, %f2, %f0
90
+ ; CHECK-NEXT: br %r14
91
+ %bf = fptrunc double %b to float
92
+ %res = call float @llvm.copysign.f32 (float %a , float %bf )
93
+ ret float %res
94
+ }
95
+
96
+ ; Test copysign with an f32 result and f128 sign argument.
97
+ define float @f7 (float %a , fp128 %b ) {
98
+ ; Z10-LABEL: f7:
99
+ ; Z10: # %bb.0:
100
+ ; Z10-NEXT: ld %f1, 0(%r2)
101
+ ; Z10-NEXT: ld %f3, 8(%r2)
102
+ ; Z10-NEXT: cpsdr %f0, %f1, %f0
103
+ ; Z10-NEXT: br %r14
104
+ ;
105
+ ; Z16-LABEL: f7:
106
+ ; Z16: # %bb.0:
107
+ ; Z16-NEXT: vl %v1, 0(%r2), 3
108
+ ; Z16-NEXT: cpsdr %f0, %f1, %f0
109
+ ; Z16-NEXT: br %r14
110
+ %bf = fptrunc fp128 %b to float
111
+ %res = call float @llvm.copysign.f32 (float %a , float %bf )
112
+ ret float %res
113
+ }
114
+
115
+ ; Test copysign with an f64 result and f16 sign argument.
116
+ define double @f8 (double %a , half %b ) {
117
+ ; CHECK-LABEL: f8:
118
+ ; CHECK: # %bb.0:
119
+ ; CHECK-NEXT: cpsdr %f0, %f2, %f0
108
120
; CHECK-NEXT: br %r14
109
121
%bd = fpext half %b to double
110
122
%res = call double @llvm.copysign.f64 (double %a , double %bd )
111
123
ret double %res
112
124
}
113
125
114
- ; Test copysign with an f128 result and half sign argument.
115
- define fp128 @f6 (fp128 %a , half %b ) {
116
- ; Z10-LABEL: f6:
126
+ ; Test copysign with an f64 result and f32 sign argument.
127
+ define double @f9 (double %a , float %b ) {
128
+ ; CHECK-LABEL: f9:
129
+ ; CHECK: # %bb.0:
130
+ ; CHECK-NEXT: cpsdr %f0, %f2, %f0
131
+ ; CHECK-NEXT: br %r14
132
+ %bd = fpext float %b to double
133
+ %res = call double @llvm.copysign.f64 (double %a , double %bd )
134
+ ret double %res
135
+ }
136
+
137
+ ; Test copysign with an f64 result and f64 sign argument.
138
+ define double @f10 (double %a , double %b ) {
139
+ ; CHECK-LABEL: f10:
140
+ ; CHECK: # %bb.0:
141
+ ; CHECK-NEXT: cpsdr %f0, %f2, %f0
142
+ ; CHECK-NEXT: br %r14
143
+ %res = call double @llvm.copysign.f64 (double %a , double %b )
144
+ ret double %res
145
+ }
146
+
147
+ ; Test copysign with an f64 result and f128 sign argument.
148
+ define double @f11 (double %a , fp128 %b ) {
149
+ ; Z10-LABEL: f11:
150
+ ; Z10: # %bb.0:
151
+ ; Z10-NEXT: ld %f1, 0(%r2)
152
+ ; Z10-NEXT: ld %f3, 8(%r2)
153
+ ; Z10-NEXT: cpsdr %f0, %f1, %f0
154
+ ; Z10-NEXT: br %r14
155
+ ;
156
+ ; Z16-LABEL: f11:
157
+ ; Z16: # %bb.0:
158
+ ; Z16-NEXT: vl %v1, 0(%r2), 3
159
+ ; Z16-NEXT: cpsdr %f0, %f1, %f0
160
+ ; Z16-NEXT: br %r14
161
+ %bd = fptrunc fp128 %b to double
162
+ %res = call double @llvm.copysign.f64 (double %a , double %bd )
163
+ ret double %res
164
+ }
165
+
166
+ ; Test copysign with an f128 result and f16 sign argument.
167
+ define fp128 @f12 (fp128 %a , half %b ) {
168
+ ; Z10-LABEL: f12:
117
169
; Z10: # %bb.0:
118
170
; Z10-NEXT: ld %f1, 0(%r3)
119
171
; Z10-NEXT: ld %f3, 8(%r3)
@@ -122,24 +174,117 @@ define fp128 @f6(fp128 %a, half %b) {
122
174
; Z10-NEXT: std %f3, 8(%r2)
123
175
; Z10-NEXT: br %r14
124
176
;
125
- ; Z16-LABEL: f6 :
177
+ ; Z16-LABEL: f12 :
126
178
; Z16: # %bb.0:
127
179
; Z16-NEXT: aghi %r15, -168
128
180
; Z16-NEXT: .cfi_def_cfa_offset 328
129
181
; Z16-NEXT: vl %v1, 0(%r3), 3
130
182
; Z16-NEXT: vsteh %v0, 164(%r15), 0
131
183
; Z16-NEXT: tm 164(%r15), 128
132
- ; Z16-NEXT: je .LBB6_2
184
+ ; Z16-NEXT: je .LBB12_2
133
185
; Z16-NEXT: # %bb.1:
134
186
; Z16-NEXT: wflnxb %v0, %v1
135
- ; Z16-NEXT: j .LBB6_3
136
- ; Z16-NEXT: .LBB6_2 :
187
+ ; Z16-NEXT: j .LBB12_3
188
+ ; Z16-NEXT: .LBB12_2 :
137
189
; Z16-NEXT: wflpxb %v0, %v1
138
- ; Z16-NEXT: .LBB6_3 :
190
+ ; Z16-NEXT: .LBB12_3 :
139
191
; Z16-NEXT: vst %v0, 0(%r2), 3
140
192
; Z16-NEXT: aghi %r15, 168
141
193
; Z16-NEXT: br %r14
142
- %bd = fpext half %b to fp128
143
- %res = call fp128 @llvm.copysign.f128 (fp128 %a , fp128 %bd )
194
+ %b128 = fpext half %b to fp128
195
+ %res = call fp128 @llvm.copysign.f128 (fp128 %a , fp128 %b128 )
196
+ ret fp128 %res
197
+ }
198
+
199
+ ; Test copysign with an f128 result and f32 sign argument.
200
+ define fp128 @f13 (fp128 %a , float %b ) {
201
+ ; Z10-LABEL: f13:
202
+ ; Z10: # %bb.0:
203
+ ; Z10-NEXT: ld %f1, 0(%r3)
204
+ ; Z10-NEXT: ld %f3, 8(%r3)
205
+ ; Z10-NEXT: cpsdr %f1, %f0, %f1
206
+ ; Z10-NEXT: std %f1, 0(%r2)
207
+ ; Z10-NEXT: std %f3, 8(%r2)
208
+ ; Z10-NEXT: br %r14
209
+ ;
210
+ ; Z16-LABEL: f13:
211
+ ; Z16: # %bb.0:
212
+ ; Z16-NEXT: vl %v1, 0(%r3), 3
213
+ ; Z16-NEXT: vlgvf %r0, %v0, 0
214
+ ; Z16-NEXT: tmlh %r0, 32768
215
+ ; Z16-NEXT: je .LBB13_2
216
+ ; Z16-NEXT: # %bb.1:
217
+ ; Z16-NEXT: wflnxb %v0, %v1
218
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
219
+ ; Z16-NEXT: br %r14
220
+ ; Z16-NEXT: .LBB13_2:
221
+ ; Z16-NEXT: wflpxb %v0, %v1
222
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
223
+ ; Z16-NEXT: br %r14
224
+ %b128 = fpext float %b to fp128
225
+ %res = call fp128 @llvm.copysign.f128 (fp128 %a , fp128 %b128 )
226
+ ret fp128 %res
227
+ }
228
+
229
+ ; Test copysign with an f128 result and f64 sign argument.
230
+ define fp128 @f14 (fp128 %a , double %b ) {
231
+ ; Z10-LABEL: f14:
232
+ ; Z10: # %bb.0:
233
+ ; Z10-NEXT: ld %f1, 0(%r3)
234
+ ; Z10-NEXT: ld %f3, 8(%r3)
235
+ ; Z10-NEXT: cpsdr %f1, %f0, %f1
236
+ ; Z10-NEXT: std %f1, 0(%r2)
237
+ ; Z10-NEXT: std %f3, 8(%r2)
238
+ ; Z10-NEXT: br %r14
239
+ ;
240
+ ; Z16-LABEL: f14:
241
+ ; Z16: # %bb.0:
242
+ ; Z16-NEXT: vl %v1, 0(%r3), 3
243
+ ; Z16-NEXT: lgdr %r0, %f0
244
+ ; Z16-NEXT: tmhh %r0, 32768
245
+ ; Z16-NEXT: je .LBB14_2
246
+ ; Z16-NEXT: # %bb.1:
247
+ ; Z16-NEXT: wflnxb %v0, %v1
248
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
249
+ ; Z16-NEXT: br %r14
250
+ ; Z16-NEXT: .LBB14_2:
251
+ ; Z16-NEXT: wflpxb %v0, %v1
252
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
253
+ ; Z16-NEXT: br %r14
254
+ %b128 = fpext double %b to fp128
255
+ %res = call fp128 @llvm.copysign.f128 (fp128 %a , fp128 %b128 )
256
+ ret fp128 %res
257
+ }
258
+
259
+ ; Test copysign with an f128 result and f128 sign argument.
260
+ define fp128 @f15 (fp128 %a , fp128 %b ) {
261
+ ; Z10-LABEL: f15:
262
+ ; Z10: # %bb.0:
263
+ ; Z10-NEXT: ld %f0, 0(%r3)
264
+ ; Z10-NEXT: ld %f2, 8(%r3)
265
+ ; Z10-NEXT: ld %f1, 0(%r4)
266
+ ; Z10-NEXT: ld %f3, 8(%r4)
267
+ ; Z10-NEXT: cpsdr %f0, %f1, %f0
268
+ ; Z10-NEXT: std %f0, 0(%r2)
269
+ ; Z10-NEXT: std %f2, 8(%r2)
270
+ ; Z10-NEXT: br %r14
271
+ ;
272
+ ; Z16-LABEL: f15:
273
+ ; Z16: # %bb.0:
274
+ ; Z16-NEXT: larl %r1, .LCPI15_0
275
+ ; Z16-NEXT: vl %v1, 0(%r4), 3
276
+ ; Z16-NEXT: vl %v2, 0(%r1), 3
277
+ ; Z16-NEXT: vl %v0, 0(%r3), 3
278
+ ; Z16-NEXT: vtm %v1, %v2
279
+ ; Z16-NEXT: je .LBB15_2
280
+ ; Z16-NEXT: # %bb.1:
281
+ ; Z16-NEXT: wflnxb %v0, %v0
282
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
283
+ ; Z16-NEXT: br %r14
284
+ ; Z16-NEXT: .LBB15_2:
285
+ ; Z16-NEXT: wflpxb %v0, %v0
286
+ ; Z16-NEXT: vst %v0, 0(%r2), 3
287
+ ; Z16-NEXT: br %r14
288
+ %res = call fp128 @llvm.copysign.f128 (fp128 %a , fp128 %b )
144
289
ret fp128 %res
145
290
}
0 commit comments