Skip to content

Commit d56e0d0

Browse files
authored
clang/OpenCL: set sqrt fp accuracy on call to Z4sqrt (#66651)
This is reverting the previous implementation to avoid adding inline function in opencl headers. This was breaking clspv flow google/clspv#1231, while https://reviews.llvm.org/D156743 mentioned that just decorating the call node with `!pfmath` was enough. This PR is implementing this idea. The test has been updated with this implementation.
1 parent e017169 commit d56e0d0

File tree

5 files changed

+82
-135
lines changed

5 files changed

+82
-135
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,6 +5608,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
56085608
BundleList);
56095609
EmitBlock(Cont);
56105610
}
5611+
if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() &&
5612+
CI->getCalledFunction()->getName().startswith("_Z4sqrt")) {
5613+
SetSqrtFPAccuracy(CI);
5614+
}
56115615
if (callOrInvoke)
56125616
*callOrInvoke = CI;
56135617

clang/lib/Headers/opencl-c-base.h

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -819,64 +819,6 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
819819

820820
#endif // cl_intel_device_side_avc_motion_estimation
821821

822-
/**
823-
* Compute square root.
824-
*
825-
* Provide inline implementations using the builtin so that we get appropriate
826-
* !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt, attached to
827-
* llvm.sqrt. The implementation should still provide an external definition.
828-
*/
829-
#define __ovld __attribute__((overloadable))
830-
#define __cnfn __attribute__((const))
831-
832-
inline float __ovld __cnfn sqrt(float __x) {
833-
return __builtin_elementwise_sqrt(__x);
834-
}
835-
836-
inline float2 __ovld __cnfn sqrt(float2 __x) {
837-
return __builtin_elementwise_sqrt(__x);
838-
}
839-
840-
inline float3 __ovld __cnfn sqrt(float3 __x) {
841-
return __builtin_elementwise_sqrt(__x);
842-
}
843-
844-
inline float4 __ovld __cnfn sqrt(float4 __x) {
845-
return __builtin_elementwise_sqrt(__x);
846-
}
847-
848-
inline float8 __ovld __cnfn sqrt(float8 __x) {
849-
return __builtin_elementwise_sqrt(__x);
850-
}
851-
852-
inline float16 __ovld __cnfn sqrt(float16 __x) {
853-
return __builtin_elementwise_sqrt(__x);
854-
}
855-
856-
// We only really want to define the float variants here. However
857-
// -fdeclare-opencl-builtins will not work if some overloads are already
858-
// provided in the base header, so provide all overloads here.
859-
860-
#ifdef cl_khr_fp64
861-
double __ovld __cnfn sqrt(double);
862-
double2 __ovld __cnfn sqrt(double2);
863-
double3 __ovld __cnfn sqrt(double3);
864-
double4 __ovld __cnfn sqrt(double4);
865-
double8 __ovld __cnfn sqrt(double8);
866-
double16 __ovld __cnfn sqrt(double16);
867-
#endif //cl_khr_fp64
868-
#ifdef cl_khr_fp16
869-
half __ovld __cnfn sqrt(half);
870-
half2 __ovld __cnfn sqrt(half2);
871-
half3 __ovld __cnfn sqrt(half3);
872-
half4 __ovld __cnfn sqrt(half4);
873-
half8 __ovld __cnfn sqrt(half8);
874-
half16 __ovld __cnfn sqrt(half16);
875-
#endif //cl_khr_fp16
876-
877-
#undef __cnfn
878-
#undef __ovld
879-
880822
// Disable any extensions we may have enabled previously.
881823
#pragma OPENCL EXTENSION all : disable
882824

clang/lib/Headers/opencl-c.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8496,6 +8496,32 @@ half8 __ovld __cnfn sinpi(half8);
84968496
half16 __ovld __cnfn sinpi(half16);
84978497
#endif //cl_khr_fp16
84988498

8499+
/**
8500+
* Compute square root.
8501+
*/
8502+
float __ovld __cnfn sqrt(float);
8503+
float2 __ovld __cnfn sqrt(float2);
8504+
float3 __ovld __cnfn sqrt(float3);
8505+
float4 __ovld __cnfn sqrt(float4);
8506+
float8 __ovld __cnfn sqrt(float8);
8507+
float16 __ovld __cnfn sqrt(float16);
8508+
#ifdef cl_khr_fp64
8509+
double __ovld __cnfn sqrt(double);
8510+
double2 __ovld __cnfn sqrt(double2);
8511+
double3 __ovld __cnfn sqrt(double3);
8512+
double4 __ovld __cnfn sqrt(double4);
8513+
double8 __ovld __cnfn sqrt(double8);
8514+
double16 __ovld __cnfn sqrt(double16);
8515+
#endif //cl_khr_fp64
8516+
#ifdef cl_khr_fp16
8517+
half __ovld __cnfn sqrt(half);
8518+
half2 __ovld __cnfn sqrt(half2);
8519+
half3 __ovld __cnfn sqrt(half3);
8520+
half4 __ovld __cnfn sqrt(half4);
8521+
half8 __ovld __cnfn sqrt(half8);
8522+
half16 __ovld __cnfn sqrt(half16);
8523+
#endif //cl_khr_fp16
8524+
84998525
/**
85008526
* Compute tangent.
85018527
*/

clang/lib/Sema/OpenCLBuiltins.td

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -563,15 +563,12 @@ foreach name = ["acos", "acosh", "acospi",
563563
"log", "log2", "log10", "log1p", "logb",
564564
"rint", "round", "rsqrt",
565565
"sin", "sinh", "sinpi",
566+
"sqrt",
566567
"tan", "tanh", "tanpi",
567568
"tgamma", "trunc",
568569
"lgamma"] in {
569570
def : Builtin<name, [FGenTypeN, FGenTypeN], Attr.Const>;
570571
}
571-
572-
// sqrt is handled in opencl-c-base.h to handle
573-
// -cl-fp32-correctly-rounded-divide-sqrt.
574-
575572
foreach name = ["nan"] in {
576573
def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeUIntVecAndScalar], Attr.Const>;
577574
def : Builtin<name, [GenTypeDoubleVecAndScalar, GenTypeULongVecAndScalar], Attr.Const>;

clang/test/CodeGenOpenCL/sqrt-fpmath.cl

Lines changed: 51 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@
33
// depending on -cl-fp32-correctly-rounded-divide-sqrt
44

55
// Test with -fdeclare-opencl-builtins
6-
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
7-
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
6+
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o %t.ll %s
7+
// RUN: FileCheck -check-prefixes=CHECK,DEFAULT %s < %t.ll
8+
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o %t.ll %s
9+
// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s < %t.ll
810

9-
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
10-
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
11+
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s
12+
// RUN: FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s < %t.ll
13+
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s
14+
// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s < %t.ll
1115

1216
// Test without -fdeclare-opencl-builtins
1317
// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
@@ -19,183 +23,157 @@
1923
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2024

2125
// CHECK-LABEL: define {{.*}} float @call_sqrt_f32(
22-
// CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}}
26+
// DEFAULT: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}}
27+
// CORRECTLYROUNDED: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}}
28+
29+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}}
30+
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}}
2331
float call_sqrt_f32(float x) {
2432
return sqrt(x);
2533
}
2634

27-
// CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x)
28-
// DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
29-
// CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
30-
31-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
32-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}}
33-
3435
// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32(
35-
// CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}}
36+
// DEFAULT: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
37+
// CORRECTLYROUNDED: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
38+
39+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
40+
// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
3641
float2 call_sqrt_v2f32(float2 x) {
3742
return sqrt(x);
3843
}
3944

40-
// CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x)
41-
// DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
42-
// CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
43-
44-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
45-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
4645

4746
// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32(
48-
// CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}}
47+
// DEFAULT: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
48+
// CORRECTLYROUNDED: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
49+
50+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
51+
// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
4952
float3 call_sqrt_v3f32(float3 x) {
5053
return sqrt(x);
5154
}
5255

53-
// CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x)
54-
// DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
55-
// CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
56-
57-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
58-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
5956

6057

6158
// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32(
62-
// CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}}
59+
// DEFAULT: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
60+
// CORRECTLYROUNDED: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
61+
62+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
63+
// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
6364
float4 call_sqrt_v4f32(float4 x) {
6465
return sqrt(x);
6566
}
6667

67-
// CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x)
68-
// DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
69-
// CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
70-
71-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
72-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
7368

7469
// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32(
75-
// CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}}
70+
// DEFAULT: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
71+
// CORRECTLYROUNDED: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
72+
73+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
74+
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
7675
float8 call_sqrt_v8f32(float8 x) {
7776
return sqrt(x);
7877
}
7978

80-
// CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x)
81-
// DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
82-
// CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
83-
84-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
85-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
8679

8780

8881
// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32(
89-
// CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}}
82+
// DEFAULT: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
83+
// CORRECTLYROUNDED: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
84+
85+
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}}
86+
// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}}
9087
float16 call_sqrt_v16f32(float16 x) {
9188
return sqrt(x);
9289
}
9390

94-
// CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x)
95-
// DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
96-
// CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
97-
98-
// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
99-
// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
10091

10192

10293
// Not for f64
10394
// CHECK-LABEL: define {{.*}} double @call_sqrt_f64(
104-
// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}
95+
// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}{{$}}
10596
double call_sqrt_f64(double x) {
10697
return sqrt(x);
10798
}
10899

109-
// CHECK-NOT: define
110100

111101
// Not for f64
112102
// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64(
113-
// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}
103+
// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
114104
double2 call_sqrt_v2f64(double2 x) {
115105
return sqrt(x);
116106
}
117107

118-
// CHECK-NOT: define
119108

120109
// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64(
121-
// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}
110+
// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
122111
double3 call_sqrt_v3f64(double3 x) {
123112
return sqrt(x);
124113
}
125114

126-
// CHECK-NOT: define
127115

128116
// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64(
129-
// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}
117+
// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
130118
double4 call_sqrt_v4f64(double4 x) {
131119
return sqrt(x);
132120
}
133121

134-
// CHECK-NOT: define
135122

136123
// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64(
137-
// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}
124+
// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
138125
double8 call_sqrt_v8f64(double8 x) {
139126
return sqrt(x);
140127
}
141128

142-
// CHECK-NOT: define
143129

144130
// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64(
145-
// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}
131+
// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
146132
double16 call_sqrt_v16f64(double16 x) {
147133
return sqrt(x);
148134
}
149135

150-
// CHECK-NOT: define
151136

152137
// Not for f16
153138
// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
154-
// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}
139+
// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
155140
half call_sqrt_f16(half x) {
156141
return sqrt(x);
157142
}
158143

159-
// CHECK-NOT: define
160144

161145
// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
162-
// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}
146+
// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
163147
half2 call_sqrt_v2f16(half2 x) {
164148
return sqrt(x);
165149
}
166150

167-
// CHECK-NOT: define
168151

169152
// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
170-
// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}
153+
// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
171154
half3 call_sqrt_v3f16(half3 x) {
172155
return sqrt(x);
173156
}
174157

175-
// CHECK-NOT: define
176158

177159
// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
178-
// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}
160+
// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
179161
half4 call_sqrt_v4f16(half4 x) {
180162
return sqrt(x);
181163
}
182164

183-
// CHECK-NOT: define
184165

185166
// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
186-
// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}
167+
// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
187168
half8 call_sqrt_v8f16(half8 x) {
188169
return sqrt(x);
189170
}
190171

191-
// CHECK-NOT: define
192172

193173
// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
194-
// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}
174+
// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
195175
half16 call_sqrt_v16f16(half16 x) {
196176
return sqrt(x);
197177
}
198178

199-
// CHECK-NOT: define
200-
201-
// DEFAULT: [[$FPMATH]] = !{float 3.000000e+00}
179+
// DEFAULT: [[FPMATH]] = !{float 3.000000e+00}

0 commit comments

Comments
 (0)