-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Modify tests for constrained rounding functions #116951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The existing tests for constrained functions often use constant arguments. If constant evaluation is enhanced, such tests will not check code generation of the tested functions. To avoid it, the tests are modified to use loaded value instead of constants. Now only the tests for rounding functions are changed.
@llvm/pr-subscribers-backend-x86 Author: Serge Pavlov (spavloff) ChangesThe existing tests for constrained functions often use constant arguments. If constant evaluation is enhanced, such tests will not check code generation of the tested functions. To avoid it, the tests are modified to use loaded value instead of constants. Now only the tests for rounding functions are changed. Patch is 59.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116951.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 21dfdc3c2abe49..579b15bbbb0ff8 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3011,12 +3011,12 @@ entry:
ret <4 x double> %log2
}
-define <1 x float> @constrained_vector_rint_v1f32() #0 {
+define <1 x float> @constrained_vector_rint_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq rintf@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3024,98 +3024,111 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_rint_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %rint
}
-define <2 x double> @constrained_vector_rint_v2f64() #0 {
+define <2 x double> @constrained_vector_rint_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
-; CHECK-NEXT: callq rint@PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: callq rint@PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq rint@PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
- <2 x double> <double 42.1, double 42.0>,
+ <2 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %rint
}
-define <3 x float> @constrained_vector_rint_v3f32() #0 {
+define <3 x float> @constrained_vector_rint_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rintf@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq rintf@PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rintf@PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %rint
}
-define <3 x double> @constrained_vector_rint_v3f64() #0 {
+define <3 x double> @constrained_vector_rint_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint@PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rint@PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq rint@PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -3124,70 +3137,76 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $4, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, ptr %a
%rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
- <3 x double> <double 42.0, double 42.1, double 42.2>,
+ <3 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x double> %rint
}
-define <4 x double> @constrained_vector_rint_v4f64() #0 {
+define <4 x double> @constrained_vector_rint_v4f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_rint_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm1
+; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps 16(%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: callq rint@PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint@PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq rint@PLT
-; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq rint@PLT
-; CHECK-NEXT: movaps %xmm0, %xmm1
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_rint_v4f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
+; AVX-NEXT: vroundpd $4, (%rdi), %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <4 x double>, ptr %a
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
- <4 x double> <double 42.1, double 42.2,
- double 42.3, double 42.4>,
+ <4 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x double> %rint
}
-define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
+define <1 x float> @constrained_vector_nearbyint_v1f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq nearbyintf@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@@ -3195,98 +3214,111 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
;
; AVX-LABEL: constrained_vector_nearbyint_v1f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <1 x float>, ptr %a
%nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
- <1 x float> <float 42.0>,
+ <1 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <1 x float> %nearby
}
-define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
+define <2 x double> @constrained_vector_nearbyint_v2f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
-; CHECK-NEXT: callq nearbyint@PLT
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
; CHECK-NEXT: callq nearbyint@PLT
-; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq nearbyint@PLT
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearbyint_v2f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm0
; AVX-NEXT: retq
entry:
+ %b = load <2 x double>, ptr %a
%nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
- <2 x double> <double 42.1, double 42.0>,
+ <2 x double> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %nearby
}
-define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
+define <3 x float> @constrained_vector_nearbyint_v3f32(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyintf@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq nearbyintf@PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyintf@PLT
-; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearbyint_v3f32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
+ %b = load <3 x float>, ptr %a
%nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
- <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <3 x float> %nearby
}
-define <3 x double> @constrained_vector_nearby_v3f64() #0 {
+define <3 x double> @constrained_vector_nearby_v3f64(ptr %a) #0 {
; CHECK-LABEL: constrained_vector_nearby_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq nearbyint@PLT
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq nearbyint@PLT
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq nearbyint@PLT
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
@@ -3295,59 +3327,65 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_nearby_v3f64:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX-NEXT: vroundpd $12, (%rdi), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
+ %b = load <3 x double>, p...
[truncated]
|
Are we losing all test coverage of the constant cases? |
Of all the changed functions only some cannot be evaluated at compile time:
They are put back to the test file. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor
@@ -3011,25 +3011,26 @@ entry: | |||
ret <4 x double> %log2 | |||
} | |||
|
|||
define <1 x float> @constrained_vector_rint_v1f32() #0 { | |||
define <1 x float> @constrained_vector_rint_v1f32(ptr %a) #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change the name to constrained_vector_rint_v1f32_var (etc.) to match the naming convention for constant / non-constant tests
The existing tests for constrained functions often use constant arguments. If constant evaluation is enhanced, such tests will not check code generation of the tested functions. To avoid it, the tests are modified to use loaded value instead of constants. Now only the tests for rounding functions are changed.