Skip to content

Commit 61ff9f8

Browse files
authored
[X86] Add strictfp version of PR43024 test. (#80573)
For the current version of the PR43024 test, we should be able to optimize away the operations but fail to do so. This commit adds a strictfp version of the test where we should not be able to optimize away the operations, as a verification that changes to improve the other effect have no adverse effect.
1 parent 6ad692b commit 61ff9f8

File tree

1 file changed

+75
-2
lines changed

1 file changed

+75
-2
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3225,6 +3225,79 @@ define void @PR43024() {
32253225
ret void
32263226
}
32273227

3228+
declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
3229+
declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
3230+
3231+
define void @PR43024_strictfp() strictfp {
3232+
; SSE2-LABEL: PR43024_strictfp:
3233+
; SSE2: # %bb.0:
3234+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3235+
; SSE2-NEXT: movaps %xmm0, (%rax)
3236+
; SSE2-NEXT: xorps %xmm1, %xmm1
3237+
; SSE2-NEXT: mulps %xmm1, %xmm0
3238+
; SSE2-NEXT: movaps %xmm0, %xmm2
3239+
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
3240+
; SSE2-NEXT: addps %xmm0, %xmm2
3241+
; SSE2-NEXT: addps %xmm1, %xmm2
3242+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3243+
; SSE2-NEXT: addps %xmm2, %xmm0
3244+
; SSE2-NEXT: movss %xmm0, (%rax)
3245+
; SSE2-NEXT: retq
3246+
;
3247+
; SSSE3-LABEL: PR43024_strictfp:
3248+
; SSSE3: # %bb.0:
3249+
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3250+
; SSSE3-NEXT: movaps %xmm0, (%rax)
3251+
; SSSE3-NEXT: xorps %xmm1, %xmm1
3252+
; SSSE3-NEXT: mulps %xmm1, %xmm0
3253+
; SSSE3-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3254+
; SSSE3-NEXT: addps %xmm0, %xmm2
3255+
; SSSE3-NEXT: addps %xmm1, %xmm2
3256+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3257+
; SSSE3-NEXT: addps %xmm2, %xmm0
3258+
; SSSE3-NEXT: movss %xmm0, (%rax)
3259+
; SSSE3-NEXT: retq
3260+
;
3261+
; SSE41-LABEL: PR43024_strictfp:
3262+
; SSE41: # %bb.0:
3263+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3264+
; SSE41-NEXT: movaps %xmm0, (%rax)
3265+
; SSE41-NEXT: xorps %xmm1, %xmm1
3266+
; SSE41-NEXT: mulps %xmm1, %xmm0
3267+
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3268+
; SSE41-NEXT: addps %xmm0, %xmm2
3269+
; SSE41-NEXT: addps %xmm1, %xmm2
3270+
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3271+
; SSE41-NEXT: addps %xmm2, %xmm0
3272+
; SSE41-NEXT: movss %xmm0, (%rax)
3273+
; SSE41-NEXT: retq
3274+
;
3275+
; AVX-LABEL: PR43024_strictfp:
3276+
; AVX: # %bb.0:
3277+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
3278+
; AVX-NEXT: vmovaps %xmm0, (%rax)
3279+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
3280+
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
3281+
; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
3282+
; AVX-NEXT: vaddps %xmm2, %xmm0, %xmm2
3283+
; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
3284+
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
3285+
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
3286+
; AVX-NEXT: vmovss %xmm0, (%rax)
3287+
; AVX-NEXT: retq
3288+
store <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x0, float 0x0>, ptr undef, align 16
3289+
%1 = load <4 x float>, ptr undef, align 16
3290+
%2 = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %1, <4 x float> zeroinitializer, metadata !"round.dynamic", metadata !"fpexcept.strict")
3291+
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
3292+
%4 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %2, <4 x float> %3, metadata !"round.dynamic", metadata !"fpexcept.strict")
3293+
%5 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> zeroinitializer, <4 x float> %4, metadata !"round.dynamic", metadata !"fpexcept.strict")
3294+
%6 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
3295+
%7 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %6, <4 x float> %5, metadata !"round.dynamic", metadata !"fpexcept.strict")
3296+
%8 = extractelement <4 x float> %7, i32 0
3297+
store float %8, ptr undef, align 8
3298+
ret void
3299+
}
3300+
32283301
define void @PR45604(ptr %dst, ptr %src) {
32293302
; SSE2-LABEL: PR45604:
32303303
; SSE2: # %bb.0:
@@ -3568,9 +3641,9 @@ define void @autogen_SD25931() {
35683641
; CHECK-LABEL: autogen_SD25931:
35693642
; CHECK: # %bb.0: # %BB
35703643
; CHECK-NEXT: .p2align 4, 0x90
3571-
; CHECK-NEXT: .LBB140_1: # %CF242
3644+
; CHECK-NEXT: .LBB141_1: # %CF242
35723645
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3573-
; CHECK-NEXT: jmp .LBB140_1
3646+
; CHECK-NEXT: jmp .LBB141_1
35743647
BB:
35753648
%Cmp16 = icmp uge <2 x i1> zeroinitializer, zeroinitializer
35763649
%Shuff19 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp16, <2 x i32> <i32 3, i32 1>

0 commit comments

Comments
 (0)