@@ -3225,6 +3225,79 @@ define void @PR43024() {
3225
3225
ret void
3226
3226
}
3227
3227
3228
+ declare <4 x float > @llvm.experimental.constrained.fadd.v4f32 (<4 x float >, <4 x float >, metadata , metadata )
3229
+ declare <4 x float > @llvm.experimental.constrained.fmul.v4f32 (<4 x float >, <4 x float >, metadata , metadata )
3230
+
3231
+ define void @PR43024_strictfp () strictfp {
3232
+ ; SSE2-LABEL: PR43024_strictfp:
3233
+ ; SSE2: # %bb.0:
3234
+ ; SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3235
+ ; SSE2-NEXT: movaps %xmm0, (%rax)
3236
+ ; SSE2-NEXT: xorps %xmm1, %xmm1
3237
+ ; SSE2-NEXT: mulps %xmm1, %xmm0
3238
+ ; SSE2-NEXT: movaps %xmm0, %xmm2
3239
+ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
3240
+ ; SSE2-NEXT: addps %xmm0, %xmm2
3241
+ ; SSE2-NEXT: addps %xmm1, %xmm2
3242
+ ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3243
+ ; SSE2-NEXT: addps %xmm2, %xmm0
3244
+ ; SSE2-NEXT: movss %xmm0, (%rax)
3245
+ ; SSE2-NEXT: retq
3246
+ ;
3247
+ ; SSSE3-LABEL: PR43024_strictfp:
3248
+ ; SSSE3: # %bb.0:
3249
+ ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3250
+ ; SSSE3-NEXT: movaps %xmm0, (%rax)
3251
+ ; SSSE3-NEXT: xorps %xmm1, %xmm1
3252
+ ; SSSE3-NEXT: mulps %xmm1, %xmm0
3253
+ ; SSSE3-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3254
+ ; SSSE3-NEXT: addps %xmm0, %xmm2
3255
+ ; SSSE3-NEXT: addps %xmm1, %xmm2
3256
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3257
+ ; SSSE3-NEXT: addps %xmm2, %xmm0
3258
+ ; SSSE3-NEXT: movss %xmm0, (%rax)
3259
+ ; SSSE3-NEXT: retq
3260
+ ;
3261
+ ; SSE41-LABEL: PR43024_strictfp:
3262
+ ; SSE41: # %bb.0:
3263
+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3264
+ ; SSE41-NEXT: movaps %xmm0, (%rax)
3265
+ ; SSE41-NEXT: xorps %xmm1, %xmm1
3266
+ ; SSE41-NEXT: mulps %xmm1, %xmm0
3267
+ ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3268
+ ; SSE41-NEXT: addps %xmm0, %xmm2
3269
+ ; SSE41-NEXT: addps %xmm1, %xmm2
3270
+ ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3271
+ ; SSE41-NEXT: addps %xmm2, %xmm0
3272
+ ; SSE41-NEXT: movss %xmm0, (%rax)
3273
+ ; SSE41-NEXT: retq
3274
+ ;
3275
+ ; AVX-LABEL: PR43024_strictfp:
3276
+ ; AVX: # %bb.0:
3277
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3278
+ ; AVX-NEXT: vmovaps %xmm0, (%rax)
3279
+ ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
3280
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
3281
+ ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3282
+ ; AVX-NEXT: vaddps %xmm2, %xmm0, %xmm2
3283
+ ; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
3284
+ ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3285
+ ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
3286
+ ; AVX-NEXT: vmovss %xmm0, (%rax)
3287
+ ; AVX-NEXT: retq
3288
+ store <4 x float > <float 0x7FF8000000000000 , float 0x7FF8000000000000 , float 0x0 , float 0x0 >, ptr undef , align 16
3289
+ %1 = load <4 x float >, ptr undef , align 16
3290
+ %2 = call <4 x float > @llvm.experimental.constrained.fmul.v4f32 (<4 x float > %1 , <4 x float > zeroinitializer , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
3291
+ %3 = shufflevector <4 x float > %2 , <4 x float > undef , <4 x i32 > <i32 1 , i32 undef , i32 undef , i32 undef >
3292
+ %4 = call <4 x float > @llvm.experimental.constrained.fadd.v4f32 (<4 x float > %2 , <4 x float > %3 , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
3293
+ %5 = call <4 x float > @llvm.experimental.constrained.fadd.v4f32 (<4 x float > zeroinitializer , <4 x float > %4 , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
3294
+ %6 = shufflevector <4 x float > %2 , <4 x float > undef , <4 x i32 > <i32 3 , i32 undef , i32 undef , i32 undef >
3295
+ %7 = call <4 x float > @llvm.experimental.constrained.fadd.v4f32 (<4 x float > %6 , <4 x float > %5 , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
3296
+ %8 = extractelement <4 x float > %7 , i32 0
3297
+ store float %8 , ptr undef , align 8
3298
+ ret void
3299
+ }
3300
+
3228
3301
define void @PR45604 (ptr %dst , ptr %src ) {
3229
3302
; SSE2-LABEL: PR45604:
3230
3303
; SSE2: # %bb.0:
@@ -3568,9 +3641,9 @@ define void @autogen_SD25931() {
3568
3641
; CHECK-LABEL: autogen_SD25931:
3569
3642
; CHECK: # %bb.0: # %BB
3570
3643
; CHECK-NEXT: .p2align 4, 0x90
3571
- ; CHECK-NEXT: .LBB140_1 : # %CF242
3644
+ ; CHECK-NEXT: .LBB141_1 : # %CF242
3572
3645
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3573
- ; CHECK-NEXT: jmp .LBB140_1
3646
+ ; CHECK-NEXT: jmp .LBB141_1
3574
3647
BB:
3575
3648
%Cmp16 = icmp uge <2 x i1 > zeroinitializer , zeroinitializer
3576
3649
%Shuff19 = shufflevector <2 x i1 > zeroinitializer , <2 x i1 > %Cmp16 , <2 x i32 > <i32 3 , i32 1 >
0 commit comments