Skip to content

Commit 4b62159

Browse files
committed
wild card out ambiguous registers, simplify fmod algorithm
1 parent 93d2c74 commit 4b62159

File tree

2 files changed

+50
-58
lines changed

2 files changed

+50
-58
lines changed

clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ fmod_impl(T X, T Y) {
7373
return __builtin_elementwise_fmod(X, Y);
7474
#else
7575
T div = X / Y;
76-
bool ge = div >= -div;
76+
bool ge = div >= 0;
7777
T frc = frac(abs(div));
7878
return select<T>(ge, frc, -frc) * Y;
7979
#endif
@@ -85,7 +85,7 @@ constexpr vector<T, N> fmod_vec_impl(vector<T, N> X, vector<T, N> Y) {
8585
return __builtin_elementwise_fmod(X, Y);
8686
#else
8787
vector<T, N> div = X / Y;
88-
vector<bool, N> ge = div >= -div;
88+
vector<bool, N> ge = div >= 0;
8989
vector<T, N> frc = frac(abs(div));
9090
return select<T>(ge, frc, -frc) * Y;
9191
#endif

clang/test/CodeGenHLSL/builtins/fmod.hlsl

Lines changed: 48 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -36,119 +36,111 @@
3636

3737

3838
// DXCHECK: define [[FNATTRS]] [[TYPE]] @
39-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] %4, %5
40-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] %7
41-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] %6, %fneg.i
42-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.[[INT_TYPE]]([[TYPE]] %8)
39+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}, %{{.*}}
40+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] %{{.*}}, 0
41+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.[[INT_TYPE]]([[TYPE]] %{{.*}})
4342
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.[[INT_TYPE]]([[TYPE]] %elt.abs.i)
44-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] %11
45-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %loadedv.i, [[TYPE]] %10, [[TYPE]] %fneg2.i
46-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, %12
43+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}
44+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, [[TYPE]] %{{.*}}, [[TYPE]] %fneg.i
45+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, %{{.*}}
4746
// DXCHECK: ret [[TYPE]] %mul.i
4847
// CHECK: define [[FNATTRS]] [[TYPE]] @
4948
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
5049
// CHECK: ret [[TYPE]] %fmod.i
5150
half test_fmod_half(half p0, half p1) { return fmod(p0, p1); }
5251

5352
// DXCHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
54-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %4, %5
55-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %7
56-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> %6, %fneg.i
57-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2[[INT_TYPE]](<2 x [[TYPE]]> %9)
53+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}, %{{.*}}
54+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> %{{.*}}, zeroinitializer
55+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2[[INT_TYPE]](<2 x [[TYPE]]> %{{.*}})
5856
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2[[INT_TYPE]](<2 x [[TYPE]]> %elt.abs.i)
59-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %12
60-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %extractvec.i, <2 x [[TYPE]]> %11, <2 x [[TYPE]]> %fneg2.i
61-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, %13
57+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}
58+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x [[TYPE]]> %{{.*}}, <2 x [[TYPE]]> %fneg.i
59+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, %{{.*}}
6260
// DXCHECK: ret <2 x [[TYPE]]> %mul.i
6361
// CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
6462
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
6563
// CHECK: ret <2 x [[TYPE]]> %fmod.i
6664
half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); }
6765

6866
// DXCHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
69-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %4, %5
70-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %7
71-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x [[TYPE]]> %6, %fneg.i
72-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.fabs.v3[[INT_TYPE]](<3 x [[TYPE]]> %9)
67+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}, %{{.*}}
68+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x [[TYPE]]> %{{.*}}, zeroinitializer
69+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.fabs.v3[[INT_TYPE]](<3 x [[TYPE]]> %{{.*}})
7370
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.dx.frac.v3[[INT_TYPE]](<3 x [[TYPE]]> %elt.abs.i)
74-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %12
75-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %extractvec.i, <3 x [[TYPE]]> %11, <3 x [[TYPE]]> %fneg2.i
76-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %hlsl.select.i, %13
71+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}
72+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x [[TYPE]]> %{{.*}}, <3 x [[TYPE]]> %fneg.i
73+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %hlsl.select.i, %{{.*}}
7774
// DXCHECK: ret <3 x [[TYPE]]> %mul.i
7875
// CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
7976
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
8077
// CHECK: ret <3 x [[TYPE]]> %fmod.i
8178
half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); }
8279

8380
// DXCHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
84-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %4, %5
85-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %7
86-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x [[TYPE]]> %6, %fneg.i
87-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.fabs.v4[[INT_TYPE]](<4 x [[TYPE]]> %9)
81+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}, %{{.*}}
82+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x [[TYPE]]> %{{.*}}, zeroinitializer
83+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.fabs.v4[[INT_TYPE]](<4 x [[TYPE]]> %{{.*}})
8884
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.dx.frac.v4[[INT_TYPE]](<4 x [[TYPE]]> %elt.abs.i)
89-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %12
90-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %extractvec.i, <4 x [[TYPE]]> %11, <4 x [[TYPE]]> %fneg2.i
91-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %hlsl.select.i, %13
85+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}
86+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x [[TYPE]]> %{{.*}}, <4 x [[TYPE]]> %fneg.i
87+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %hlsl.select.i, %{{.*}}
9288
// DXCHECK: ret <4 x [[TYPE]]> %mul.i
9389
// CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
9490
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
9591
// CHECK: ret <4 x [[TYPE]]> %fmod.i
9692
half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); }
9793

9894
// DXCHECK: define [[FNATTRS]] float @
99-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn float %4, %5
100-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn float %7
101-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge float %6, %fneg.i
102-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %8)
95+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn float %{{.*}}, %{{.*}}
96+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge float %{{.*}}, 0.000000e+00
97+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %{{.*}})
10398
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn float @llvm.dx.frac.f32(float %elt.abs.i)
104-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn float %11
105-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %loadedv.i, float %10, float %fneg2.i
106-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float %hlsl.select.i, %12
99+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn float %{{.*}}
100+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float %{{.*}}, float %fneg.i
101+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float %hlsl.select.i, %{{.*}}
107102
// DXCHECK: ret float %mul.i
108103
// CHECK: define [[FNATTRS]] float @
109104
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn float
110105
// CHECK: ret float %fmod.i
111106
float test_fmod_float(float p0, float p1) { return fmod(p0, p1); }
112107

113108
// DXCHECK: define [[FNATTRS]] <2 x float> @
114-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x float> %4, %5
115-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x float> %7
116-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x float> %6, %fneg.i
117-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(<2 x float> %9)
109+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}, %{{.*}}
110+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x float> %{{.*}}, zeroinitializer
111+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(<2 x float> %{{.*}})
118112
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.frac.v2f32(<2 x float> %elt.abs.i)
119-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <2 x float> %12
120-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %extractvec.i, <2 x float> %11, <2 x float> %fneg2.i
121-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x float> %hlsl.select.i, %13
113+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}
114+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %fneg.i
115+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x float> %hlsl.select.i, %{{.*}}
122116
// DXCHECK: ret <2 x float> %mul.i
123117
// CHECK: define [[FNATTRS]] <2 x float> @
124118
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x float>
125119
// CHECK: ret <2 x float> %fmod.i
126120
float2 test_fmod_float2(float2 p0, float2 p1) { return fmod(p0, p1); }
127121

128122
// DXCHECK: define [[FNATTRS]] <3 x float> @
129-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x float> %4, %5
130-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x float> %7
131-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x float> %6, %fneg.i
132-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(<3 x float> %9)
123+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}, %{{.*}}
124+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x float> %{{.*}}, zeroinitializer
125+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(<3 x float> %{{.*}})
133126
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.frac.v3f32(<3 x float> %elt.abs.i)
134-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <3 x float> %12
135-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %extractvec.i, <3 x float> %11, <3 x float> %fneg2.i
136-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x float> %hlsl.select.i, %13
127+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}
128+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %fneg.i
129+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x float> %hlsl.select.i, %{{.*}}
137130
// DXCHECK: ret <3 x float> %mul.i
138131
// CHECK: define [[FNATTRS]] <3 x float> @
139132
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x float>
140133
// CHECK: ret <3 x float> %fmod.i
141134
float3 test_fmod_float3(float3 p0, float3 p1) { return fmod(p0, p1); }
142135

143136
// DXCHECK: define [[FNATTRS]] <4 x float> @
144-
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x float> %4, %5
145-
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x float> %7
146-
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x float> %6, %fneg.i
147-
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(<4 x float> %9)
137+
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}, %{{.*}}
138+
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x float> %{{.*}}, zeroinitializer
139+
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}})
148140
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.frac.v4f32(<4 x float> %elt.abs.i)
149-
// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <4 x float> %12
150-
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %extractvec.i, <4 x float> %11, <4 x float> %fneg2.i
151-
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %hlsl.select.i, %13
141+
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}
142+
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %fneg.i
143+
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %hlsl.select.i, %{{.*}}
152144
// DXCHECK: ret <4 x float> %mul.i
153145
// CHECK: define [[FNATTRS]] <4 x float> @
154146
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x float>

0 commit comments

Comments
 (0)