[X86] Add strictfp version of PR43024 test. #80573

hvdijk · 2024-02-04T01:10:21Z

For the current version of the PR43024 test, we should be able to optimize away the operations but fail to do so. This commit adds a strictfp version of the test where we should not be able to optimize away the operations, as a verification that changes to improve the other effect have no adverse effect.

llvmbot · 2024-02-04T01:10:53Z

@llvm/pr-subscribers-backend-x86

Author: Harald van Dijk (hvdijk)

Changes

For the current version of the PR43024 test, we should be able to optimize away the operations but fail to do so. This commit adds a strictfp version of the test where we should not be able to optimize away the operations, as a verification that changes to improve the other effect have no adverse effect.

Full diff: https://github.com/llvm/llvm-project/pull/80573.diff

1 Files Affected:

(modified) llvm/test/CodeGen/X86/vector-shuffle-combining.ll (+75-2)

diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index d02a9a64b0302..b5adfb3733357 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3225,6 +3225,79 @@ define void @PR43024() {
   ret void
 }
 
+declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+define void @PR43024_strictfp() strictfp {
+; SSE2-LABEL: PR43024_strictfp:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
+; SSE2-NEXT:    movaps %xmm0, (%rax)
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    mulps %xmm1, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
+; SSE2-NEXT:    addps %xmm0, %xmm2
+; SSE2-NEXT:    addps %xmm1, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSE2-NEXT:    addps %xmm2, %xmm0
+; SSE2-NEXT:    movss %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: PR43024_strictfp:
+; SSSE3:       # %bb.0:
+; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
+; SSSE3-NEXT:    movaps %xmm0, (%rax)
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    mulps %xmm1, %xmm0
+; SSSE3-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSSE3-NEXT:    addps %xmm0, %xmm2
+; SSSE3-NEXT:    addps %xmm1, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSSE3-NEXT:    addps %xmm2, %xmm0
+; SSSE3-NEXT:    movss %xmm0, (%rax)
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: PR43024_strictfp:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
+; SSE41-NEXT:    movaps %xmm0, (%rax)
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    mulps %xmm1, %xmm0
+; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT:    addps %xmm0, %xmm2
+; SSE41-NEXT:    addps %xmm1, %xmm2
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSE41-NEXT:    addps %xmm2, %xmm0
+; SSE41-NEXT:    movss %xmm0, (%rax)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: PR43024_strictfp:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovaps %xmm0, (%rax)
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT:    vaddps %xmm2, %xmm0, %xmm2
+; AVX-NEXT:    vaddps %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovss %xmm0, (%rax)
+; AVX-NEXT:    retq
+  store <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x0, float 0x0>, ptr undef, align 16
+  %1 = load <4 x float>, ptr undef, align 16
+  %2 = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %1, <4 x float> zeroinitializer, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %4 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %2, <4 x float> %3, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %5 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> zeroinitializer, <4 x float> %4, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %6 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+  %7 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %6, <4 x float> %5, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %8 = extractelement <4 x float> %7, i32 0
+  store float %8, ptr undef, align 8
+  ret void
+}
+
 define void @PR45604(ptr %dst, ptr %src) {
 ; SSE2-LABEL: PR45604:
 ; SSE2:       # %bb.0:
@@ -3568,9 +3641,9 @@ define void @autogen_SD25931() {
 ; CHECK-LABEL: autogen_SD25931:
 ; CHECK:       # %bb.0: # %BB
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB140_1: # %CF242
+; CHECK-NEXT:  .LBB141_1: # %CF242
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    jmp .LBB140_1
+; CHECK-NEXT:    jmp .LBB141_1
 BB:
   %Cmp16 = icmp uge <2 x i1> zeroinitializer, zeroinitializer
   %Shuff19 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp16, <2 x i32> <i32 3, i32 1>

phoebewang

LGTM, thanks!

For the current version of the PR43024 test, we should be able to optimize away the operations but fail to do so. This commit adds a strictfp version of the test where we should not be able to optimize away the operations, as a verification that changes to improve the other effect have no adverse effect.

llvmbot added the backend:X86 label Feb 4, 2024

hvdijk mentioned this pull request Feb 4, 2024

[DAGCombiner] Optimize more vector element extractions. #80520

Closed

phoebewang approved these changes Feb 4, 2024

View reviewed changes

hvdijk merged commit 61ff9f8 into llvm:main Feb 4, 2024

hvdijk deleted the pr43024-strictfp branch February 4, 2024 01:36

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Add strictfp version of PR43024 test. #80573

[X86] Add strictfp version of PR43024 test. #80573

Uh oh!

hvdijk commented Feb 4, 2024

Uh oh!

llvmbot commented Feb 4, 2024

Uh oh!

phoebewang left a comment

Uh oh!

Uh oh!

[X86] Add strictfp version of PR43024 test. #80573

[X86] Add strictfp version of PR43024 test. #80573

Uh oh!

Conversation

hvdijk commented Feb 4, 2024

Uh oh!

llvmbot commented Feb 4, 2024

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!