Skip to content

Commit 9776dc0

Browse files
committed
[X86] Add strictfp version of PR43024 test.
For the current version of the PR43024 test, we should be able to optimize away the operations but fail to do so. This commit adds a strictfp version of the test where we should not be able to optimize away the operations, as a verification that changes to improve the other effect have no adverse effect.
1 parent 6ad692b commit 9776dc0

File tree

1 file changed

+75
-2
lines changed

1 file changed

+75
-2
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3225,6 +3225,79 @@ define void @PR43024() {
32253225
ret void
32263226
}
32273227

3228+
declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
3229+
declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
3230+
3231+
define void @PR43024_strictfp() strictfp {
3232+
; SSE2-LABEL: PR43024_strictfp:
3233+
; SSE2: # %bb.0:
3234+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3235+
; SSE2-NEXT: movaps %xmm0, (%rax)
3236+
; SSE2-NEXT: xorps %xmm1, %xmm1
3237+
; SSE2-NEXT: mulps %xmm1, %xmm0
3238+
; SSE2-NEXT: movaps %xmm0, %xmm2
3239+
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
3240+
; SSE2-NEXT: addps %xmm0, %xmm2
3241+
; SSE2-NEXT: addps %xmm1, %xmm2
3242+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3243+
; SSE2-NEXT: addps %xmm2, %xmm0
3244+
; SSE2-NEXT: movss %xmm0, (%rax)
3245+
; SSE2-NEXT: retq
3246+
;
3247+
; SSSE3-LABEL: PR43024_strictfp:
3248+
; SSSE3: # %bb.0:
3249+
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3250+
; SSSE3-NEXT: movaps %xmm0, (%rax)
3251+
; SSSE3-NEXT: xorps %xmm1, %xmm1
3252+
; SSSE3-NEXT: mulps %xmm1, %xmm0
3253+
; SSSE3-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3254+
; SSSE3-NEXT: addps %xmm0, %xmm2
3255+
; SSSE3-NEXT: addps %xmm1, %xmm2
3256+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3257+
; SSSE3-NEXT: addps %xmm2, %xmm0
3258+
; SSSE3-NEXT: movss %xmm0, (%rax)
3259+
; SSSE3-NEXT: retq
3260+
;
3261+
; SSE41-LABEL: PR43024_strictfp:
3262+
; SSE41: # %bb.0:
3263+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3264+
; SSE41-NEXT: movaps %xmm0, (%rax)
3265+
; SSE41-NEXT: xorps %xmm1, %xmm1
3266+
; SSE41-NEXT: mulps %xmm1, %xmm0
3267+
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3268+
; SSE41-NEXT: addps %xmm0, %xmm2
3269+
; SSE41-NEXT: addps %xmm1, %xmm2
3270+
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3271+
; SSE41-NEXT: addps %xmm2, %xmm0
3272+
; SSE41-NEXT: movss %xmm0, (%rax)
3273+
; SSE41-NEXT: retq
3274+
;
3275+
; AVX-LABEL: PR43024_strictfp:
3276+
; AVX: # %bb.0:
3277+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3278+
; AVX-NEXT: vmovaps %xmm0, (%rax)
3279+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
3280+
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
3281+
; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3282+
; AVX-NEXT: vaddps %xmm2, %xmm0, %xmm2
3283+
; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
3284+
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3285+
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
3286+
; AVX-NEXT: vmovss %xmm0, (%rax)
3287+
; AVX-NEXT: retq
3288+
store <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x0, float 0x0>, ptr undef, align 16
3289+
%1 = load <4 x float>, ptr undef, align 16
3290+
%2 = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %1, <4 x float> zeroinitializer, metadata !"round.dynamic", metadata !"fpexcept.strict")
3291+
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
3292+
%4 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %2, <4 x float> %3, metadata !"round.dynamic", metadata !"fpexcept.strict")
3293+
%5 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> zeroinitializer, <4 x float> %4, metadata !"round.dynamic", metadata !"fpexcept.strict")
3294+
%6 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
3295+
%7 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %6, <4 x float> %5, metadata !"round.dynamic", metadata !"fpexcept.strict")
3296+
%8 = extractelement <4 x float> %7, i32 0
3297+
store float %8, ptr undef, align 8
3298+
ret void
3299+
}
3300+
32283301
define void @PR45604(ptr %dst, ptr %src) {
32293302
; SSE2-LABEL: PR45604:
32303303
; SSE2: # %bb.0:
@@ -3568,9 +3641,9 @@ define void @autogen_SD25931() {
35683641
; CHECK-LABEL: autogen_SD25931:
35693642
; CHECK: # %bb.0: # %BB
35703643
; CHECK-NEXT: .p2align 4, 0x90
3571-
; CHECK-NEXT: .LBB140_1: # %CF242
3644+
; CHECK-NEXT: .LBB141_1: # %CF242
35723645
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3573-
; CHECK-NEXT: jmp .LBB140_1
3646+
; CHECK-NEXT: jmp .LBB141_1
35743647
BB:
35753648
%Cmp16 = icmp uge <2 x i1> zeroinitializer, zeroinitializer
35763649
%Shuff19 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp16, <2 x i32> <i32 3, i32 1>

0 commit comments

Comments
 (0)