Skip to content

Commit d0423ed

Browse files
committed
remove use of vec_reduce_add
1 parent 2de0c49 commit d0423ed

File tree

4 files changed

+64
-284
lines changed

4 files changed

+64
-284
lines changed

clang/lib/Headers/hlsl/hlsl_detail.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@ length_impl(T X) {
4848
}
4949

5050
template <typename T, int N>
51-
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
51+
enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
5252
length_vec_impl(vector<T, N> X) {
5353
vector<T, N> XSquared = X * X;
54-
T XSquaredSum = __builtin_reduce_add(XSquared);
54+
T XSquaredSum = XSquared[0];
55+
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i];
5556
return __builtin_elementwise_sqrt(XSquaredSum);
5657
}
5758

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,13 +1300,11 @@ float4 lerp(float4, float4, float4);
13001300
const inline half length(half X) { return __detail::length_impl(X); }
13011301
const inline float length(float X) { return __detail::length_impl(X); }
13021302

1303-
template <int N>
1304-
const inline half length(vector<half, N> X) {
1303+
template <int N> const inline half length(vector<half, N> X) {
13051304
return __detail::length_vec_impl(X);
13061305
}
13071306

1308-
template <int N>
1309-
const inline float length(vector<float, N> X) {
1307+
template <int N> const inline float length(vector<float, N> X) {
13101308
return __detail::length_vec_impl(X);
13111309
}
13121310

Lines changed: 59 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,14 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
22
// RUN: %clang_cc1 -finclude-default-header -triple \
33
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
4-
// RUN: -emit-llvm -O1 -o - | FileCheck %s
5-
// RUN: %clang_cc1 -finclude-default-header -triple \
6-
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
7-
// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK
8-
4+
// RUN: -emit-llvm -O2 -o - | FileCheck %s
95

106
// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
117
// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
128
// CHECK-NEXT: [[ENTRY:.*:]]
139
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
1410
// CHECK-NEXT: ret half [[ELT_ABS_I]]
1511
//
16-
// SPVCHECK-LABEL: define spir_func noundef half @_Z16test_length_halfDh(
17-
// SPVCHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
18-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
19-
// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
20-
// SPVCHECK-NEXT: ret half [[ELT_ABS_I]]
21-
//
2212
half test_length_half(half p0)
2313
{
2414
return length(p0);
@@ -28,16 +18,12 @@ half test_length_half(half p0)
2818
// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
2919
// CHECK-NEXT: [[ENTRY:.*:]]
3020
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
31-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
32-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
21+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
22+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
23+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT1_I]], [[VECEXT_I]]
24+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I]])
3325
// CHECK-NEXT: ret half [[TMP0]]
3426
//
35-
// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half2Dv2_Dh(
36-
// SPVCHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
37-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
38-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v2f16(<2 x half> [[P0]])
39-
// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
40-
//
4127
half test_length_half2(half2 p0)
4228
{
4329
return length(p0);
@@ -47,16 +33,14 @@ half test_length_half2(half2 p0)
4733
// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
4834
// CHECK-NEXT: [[ENTRY:.*:]]
4935
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
50-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
51-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
36+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
37+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
38+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
39+
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
40+
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
41+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
5242
// CHECK-NEXT: ret half [[TMP0]]
5343
//
54-
// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half3Dv3_Dh(
55-
// SPVCHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
56-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
57-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v3f16(<3 x half> [[P0]])
58-
// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
59-
//
6044
half test_length_half3(half3 p0)
6145
{
6246
return length(p0);
@@ -66,16 +50,16 @@ half test_length_half3(half3 p0)
6650
// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
6751
// CHECK-NEXT: [[ENTRY:.*:]]
6852
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
69-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
70-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
53+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
54+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
55+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
56+
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
57+
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
58+
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
59+
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd half [[ADD_I_1]], [[VECEXT1_I_2]]
60+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
7161
// CHECK-NEXT: ret half [[TMP0]]
7262
//
73-
// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half4Dv4_Dh(
74-
// SPVCHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
75-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
76-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v4f16(<4 x half> [[P0]])
77-
// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
78-
//
7963
half test_length_half4(half4 p0)
8064
{
8165
return length(p0);
@@ -88,12 +72,6 @@ half test_length_half4(half4 p0)
8872
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
8973
// CHECK-NEXT: ret float [[ELT_ABS_I]]
9074
//
91-
// SPVCHECK-LABEL: define spir_func noundef float @_Z17test_length_floatf(
92-
// SPVCHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
93-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
94-
// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
95-
// SPVCHECK-NEXT: ret float [[ELT_ABS_I]]
96-
//
9775
float test_length_float(float p0)
9876
{
9977
return length(p0);
@@ -103,16 +81,12 @@ float test_length_float(float p0)
10381
// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
10482
// CHECK-NEXT: [[ENTRY:.*:]]
10583
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
106-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
107-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
84+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
85+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
86+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT1_I]], [[VECEXT_I]]
87+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I]])
10888
// CHECK-NEXT: ret float [[TMP0]]
10989
//
110-
// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float2Dv2_f(
111-
// SPVCHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
112-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
113-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v2f32(<2 x float> [[P0]])
114-
// SPVCHECK-NEXT: ret float [[HLSL_LENGTH_I]]
115-
//
11690
float test_length_float2(float2 p0)
11791
{
11892
return length(p0);
@@ -122,38 +96,56 @@ float test_length_float2(float2 p0)
12296
// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
12397
// CHECK-NEXT: [[ENTRY:.*:]]
12498
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
125-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
126-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
99+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
100+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
101+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
102+
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
103+
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
104+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
127105
// CHECK-NEXT: ret float [[TMP0]]
128106
//
129-
// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float3Dv3_f(
130-
// SPVCHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
131-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
132-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v3f32(<3 x float> [[P0]])
133-
// SPVCHECK-NEXT: ret float [[HLSL_LENGTH_I]]
134-
//
135107
float test_length_float3(float3 p0)
136108
{
137109
return length(p0);
138110
}
139111

112+
140113
// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f(
141114
// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
142115
// CHECK-NEXT: [[ENTRY:.*:]]
143116
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
144-
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
145-
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
117+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
118+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
119+
// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
120+
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
121+
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
122+
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
123+
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd float [[ADD_I_1]], [[VECEXT1_I_2]]
124+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
125+
// CHECK-NEXT: ret float [[TMP0]]
126+
//
127+
float test_length_float4(float4 p0)
128+
{
129+
return length(p0);
130+
}
131+
132+
133+
// CHECK-LABEL: define noundef float @_Z26test_length_float4_extractDv4_f(
134+
// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
135+
// CHECK-NEXT: [[ENTRY:.*:]]
136+
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
137+
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
138+
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
139+
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
140+
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
141+
// CHECK-NEXT: [[ADD_I12:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
142+
// CHECK-NEXT: [[ADD_I12_1:%.*]] = fadd float [[ADD_I12]], [[VECEXT1_I_1]]
143+
// CHECK-NEXT: [[ADD_I12_2:%.*]] = fadd float [[ADD_I12_1]], [[VECEXT1_I_2]]
144+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I12_2]])
146145
// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]]
147146
// CHECK-NEXT: ret float [[ADD]]
148147
//
149-
// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float4Dv4_f(
150-
// SPVCHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
151-
// SPVCHECK-NEXT: [[ENTRY:.*:]]
152-
// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v4f32(<4 x float> [[P0]])
153-
// SPVCHECK-NEXT: [[ADD:%.*]] = fadd float [[HLSL_LENGTH_I]], [[HLSL_LENGTH_I]]
154-
// SPVCHECK-NEXT: ret float [[ADD]]
155-
//
156-
float test_length_float4(float4 p0)
148+
float test_length_float4_extract(float4 p0)
157149
{
158150
return length(p0) + length(p0);
159151
}

0 commit comments

Comments
 (0)