Skip to content

Commit 319c7a4

Browse files
authored
[HLSL][SPIRV]Add SPIRV generation for HLSL dot (llvm#104656)
This adds the SPIRV fdot, sdot, and udot intrinsics and allows them to be created at codegen depending on the target architecture. This required moving some of the DXIL-specific choices to DXIL instruction expansion out of codegen and providing it with at a more generic fdot intrinsic as well. Removed some stale comments that gave the obsolete impression that type conversions should be expected to match overloads. The SPIRV intrinsic handling involves generating multiply and add operations for integers and the existing OpDot operation for floating point. New tests for generating SPIRV float and integer dot intrinsics are added as well as expanding HLSL tests to include SPIRV generation Used new dot product intrinsic generation to implement normalize() in SPIRV Incidentally changed existing dot intrinsic definitions to use DefaultAttrsIntrinsic to match the newly added inrinsics Fixes llvm#88056
1 parent 518b1f0 commit 319c7a4

File tree

13 files changed

+579
-252
lines changed

13 files changed

+579
-252
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18497,22 +18497,14 @@ llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
1849718497
return Arg;
1849818498
}
1849918499

18500-
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18501-
if (QT->hasFloatingRepresentation()) {
18502-
switch (elementCount) {
18503-
case 2:
18504-
return Intrinsic::dx_dot2;
18505-
case 3:
18506-
return Intrinsic::dx_dot3;
18507-
case 4:
18508-
return Intrinsic::dx_dot4;
18509-
}
18510-
}
18511-
if (QT->hasSignedIntegerRepresentation())
18512-
return Intrinsic::dx_sdot;
18513-
18514-
assert(QT->hasUnsignedIntegerRepresentation());
18515-
return Intrinsic::dx_udot;
18500+
// Return dot product intrinsic that corresponds to the QT scalar type
18501+
Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
18502+
if (QT->isFloatingType())
18503+
return RT.getFDotIntrinsic();
18504+
if (QT->isSignedIntegerType())
18505+
return RT.getSDotIntrinsic();
18506+
assert(QT->isUnsignedIntegerType());
18507+
return RT.getUDotIntrinsic();
1851618508
}
1851718509

1851818510
Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
@@ -18555,37 +18547,38 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1855518547
Value *Op1 = EmitScalarExpr(E->getArg(1));
1855618548
llvm::Type *T0 = Op0->getType();
1855718549
llvm::Type *T1 = Op1->getType();
18550+
18551+
// If the arguments are scalars, just emit a multiply
1855818552
if (!T0->isVectorTy() && !T1->isVectorTy()) {
1855918553
if (T0->isFloatingPointTy())
18560-
return Builder.CreateFMul(Op0, Op1, "dx.dot");
18554+
return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
1856118555

1856218556
if (T0->isIntegerTy())
18563-
return Builder.CreateMul(Op0, Op1, "dx.dot");
18557+
return Builder.CreateMul(Op0, Op1, "hlsl.dot");
1856418558

18565-
// Bools should have been promoted
1856618559
llvm_unreachable(
1856718560
"Scalar dot product is only supported on ints and floats.");
1856818561
}
18562+
// For vectors, validate types and emit the appropriate intrinsic
18563+
1856918564
// A VectorSplat should have happened
1857018565
assert(T0->isVectorTy() && T1->isVectorTy() &&
1857118566
"Dot product of vector and scalar is not supported.");
1857218567

18573-
// A vector sext or sitofp should have happened
18574-
assert(T0->getScalarType() == T1->getScalarType() &&
18575-
"Dot product of vectors need the same element types.");
18576-
1857718568
auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
1857818569
[[maybe_unused]] auto *VecTy1 =
1857918570
E->getArg(1)->getType()->getAs<VectorType>();
18580-
// A HLSLVectorTruncation should have happend
18571+
18572+
assert(VecTy0->getElementType() == VecTy1->getElementType() &&
18573+
"Dot product of vectors need the same element types.");
18574+
1858118575
assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
1858218576
"Dot product requires vectors to be of the same size.");
1858318577

1858418578
return Builder.CreateIntrinsic(
1858518579
/*ReturnType=*/T0->getScalarType(),
18586-
getDotProductIntrinsic(E->getArg(0)->getType(),
18587-
VecTy0->getNumElements()),
18588-
ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18580+
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
18581+
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
1858918582
} break;
1859018583
case Builtin::BI__builtin_hlsl_lerp: {
1859118584
Value *X = EmitScalarExpr(E->getArg(0));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class CGHLSLRuntime {
8181
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
8282
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
8383
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
84+
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
85+
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
86+
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
8487

8588
//===----------------------------------------------------------------------===//
8689
// End of reserved area for HLSL intrinsic getters.

clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
// CHECK-LABEL: builtin_bool_to_float_type_promotion
44
// CHECK: %conv1 = uitofp i1 %loadedv to double
5-
// CHECK: %dx.dot = fmul double %conv, %conv1
6-
// CHECK: %conv2 = fptrunc double %dx.dot to float
5+
// CHECK: %hlsl.dot = fmul double %conv, %conv1
6+
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
77
// CHECK: ret float %conv2
88
float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
99
return __builtin_hlsl_dot ( p0, p1 );
@@ -12,8 +12,8 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
1212
// CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
1313
// CHECK: %conv = uitofp i1 %loadedv to double
1414
// CHECK: %conv1 = fpext float %1 to double
15-
// CHECK: %dx.dot = fmul double %conv, %conv1
16-
// CHECK: %conv2 = fptrunc double %dx.dot to float
15+
// CHECK: %hlsl.dot = fmul double %conv, %conv1
16+
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
1717
// CHECK: ret float %conv2
1818
float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
1919
return __builtin_hlsl_dot ( p0, p1 );
@@ -22,8 +22,8 @@ float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
2222
// CHECK-LABEL: builtin_dot_int_to_float_promotion
2323
// CHECK: %conv = fpext float %0 to double
2424
// CHECK: %conv1 = sitofp i32 %1 to double
25-
// CHECK: dx.dot = fmul double %conv, %conv1
26-
// CHECK: %conv2 = fptrunc double %dx.dot to float
25+
// CHECK: dot = fmul double %conv, %conv1
26+
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
2727
// CHECK: ret float %conv2
2828
float builtin_dot_int_to_float_promotion ( float p0, int p1 ) {
2929
return __builtin_hlsl_dot ( p0, p1 );
Lines changed: 109 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,161 +1,172 @@
11
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
22
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
33
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
4-
// RUN: --check-prefixes=CHECK,NATIVE_HALF
4+
// RUN: --check-prefixes=CHECK,DXCHECK,NATIVE_HALF
55
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
66
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
7-
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
7+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXCHECK,NO_HALF
88

9-
#ifdef __HLSL_ENABLE_16_BIT
10-
// NATIVE_HALF: %dx.dot = mul i16 %0, %1
11-
// NATIVE_HALF: ret i16 %dx.dot
12-
int16_t test_dot_short(int16_t p0, int16_t p1) { return dot(p0, p1); }
13-
14-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v2i16(<2 x i16> %0, <2 x i16> %1)
15-
// NATIVE_HALF: ret i16 %dx.dot
16-
int16_t test_dot_short2(int16_t2 p0, int16_t2 p1) { return dot(p0, p1); }
17-
18-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v3i16(<3 x i16> %0, <3 x i16> %1)
19-
// NATIVE_HALF: ret i16 %dx.dot
20-
int16_t test_dot_short3(int16_t3 p0, int16_t3 p1) { return dot(p0, p1); }
21-
22-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v4i16(<4 x i16> %0, <4 x i16> %1)
23-
// NATIVE_HALF: ret i16 %dx.dot
24-
int16_t test_dot_short4(int16_t4 p0, int16_t4 p1) { return dot(p0, p1); }
25-
26-
// NATIVE_HALF: %dx.dot = mul i16 %0, %1
27-
// NATIVE_HALF: ret i16 %dx.dot
28-
uint16_t test_dot_ushort(uint16_t p0, uint16_t p1) { return dot(p0, p1); }
29-
30-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v2i16(<2 x i16> %0, <2 x i16> %1)
31-
// NATIVE_HALF: ret i16 %dx.dot
32-
uint16_t test_dot_ushort2(uint16_t2 p0, uint16_t2 p1) { return dot(p0, p1); }
33-
34-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v3i16(<3 x i16> %0, <3 x i16> %1)
35-
// NATIVE_HALF: ret i16 %dx.dot
36-
uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); }
9+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
10+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
11+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
12+
// RUN: --check-prefixes=CHECK,SPVCHECK,NATIVE_HALF
13+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
14+
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
15+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,SPVCHECK,NO_HALF
3716

38-
// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v4i16(<4 x i16> %0, <4 x i16> %1)
39-
// NATIVE_HALF: ret i16 %dx.dot
40-
uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); }
41-
#endif
4217

43-
// CHECK: %dx.dot = mul i32 %0, %1
44-
// CHECK: ret i32 %dx.dot
18+
// CHECK: %hlsl.dot = mul i32
19+
// CHECK: ret i32 %hlsl.dot
4520
int test_dot_int(int p0, int p1) { return dot(p0, p1); }
4621

47-
// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v2i32(<2 x i32> %0, <2 x i32> %1)
48-
// CHECK: ret i32 %dx.dot
22+
// Capture the expected interchange format so not every check needs to be duplicated
23+
// DXCHECK: %hlsl.dot = call i32 @llvm.[[ICF:dx]].sdot.v2i32(<2 x i32>
24+
// SPVCHECK: %hlsl.dot = call i32 @llvm.[[ICF:spv]].sdot.v2i32(<2 x i32>
25+
// CHECK: ret i32 %hlsl.dot
4926
int test_dot_int2(int2 p0, int2 p1) { return dot(p0, p1); }
5027

51-
// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v3i32(<3 x i32> %0, <3 x i32> %1)
52-
// CHECK: ret i32 %dx.dot
28+
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].sdot.v3i32(<3 x i32>
29+
// CHECK: ret i32 %hlsl.dot
5330
int test_dot_int3(int3 p0, int3 p1) { return dot(p0, p1); }
5431

55-
// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v4i32(<4 x i32> %0, <4 x i32> %1)
56-
// CHECK: ret i32 %dx.dot
32+
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].sdot.v4i32(<4 x i32>
33+
// CHECK: ret i32 %hlsl.dot
5734
int test_dot_int4(int4 p0, int4 p1) { return dot(p0, p1); }
5835

59-
// CHECK: %dx.dot = mul i32 %0, %1
60-
// CHECK: ret i32 %dx.dot
36+
// CHECK: %hlsl.dot = mul i32
37+
// CHECK: ret i32 %hlsl.dot
6138
uint test_dot_uint(uint p0, uint p1) { return dot(p0, p1); }
6239

63-
// CHECK: %dx.dot = call i32 @llvm.dx.udot.v2i32(<2 x i32> %0, <2 x i32> %1)
64-
// CHECK: ret i32 %dx.dot
40+
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v2i32(<2 x i32>
41+
// CHECK: ret i32 %hlsl.dot
6542
uint test_dot_uint2(uint2 p0, uint2 p1) { return dot(p0, p1); }
6643

67-
// CHECK: %dx.dot = call i32 @llvm.dx.udot.v3i32(<3 x i32> %0, <3 x i32> %1)
68-
// CHECK: ret i32 %dx.dot
44+
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v3i32(<3 x i32>
45+
// CHECK: ret i32 %hlsl.dot
6946
uint test_dot_uint3(uint3 p0, uint3 p1) { return dot(p0, p1); }
7047

71-
// CHECK: %dx.dot = call i32 @llvm.dx.udot.v4i32(<4 x i32> %0, <4 x i32> %1)
72-
// CHECK: ret i32 %dx.dot
48+
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v4i32(<4 x i32>
49+
// CHECK: ret i32 %hlsl.dot
7350
uint test_dot_uint4(uint4 p0, uint4 p1) { return dot(p0, p1); }
7451

75-
// CHECK: %dx.dot = mul i64 %0, %1
76-
// CHECK: ret i64 %dx.dot
52+
// CHECK: %hlsl.dot = mul i64
53+
// CHECK: ret i64 %hlsl.dot
7754
int64_t test_dot_long(int64_t p0, int64_t p1) { return dot(p0, p1); }
7855

79-
// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v2i64(<2 x i64> %0, <2 x i64> %1)
80-
// CHECK: ret i64 %dx.dot
56+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v2i64(<2 x i64>
57+
// CHECK: ret i64 %hlsl.dot
8158
int64_t test_dot_long2(int64_t2 p0, int64_t2 p1) { return dot(p0, p1); }
8259

83-
// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v3i64(<3 x i64> %0, <3 x i64> %1)
84-
// CHECK: ret i64 %dx.dot
60+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v3i64(<3 x i64>
61+
// CHECK: ret i64 %hlsl.dot
8562
int64_t test_dot_long3(int64_t3 p0, int64_t3 p1) { return dot(p0, p1); }
8663

87-
// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v4i64(<4 x i64> %0, <4 x i64> %1)
88-
// CHECK: ret i64 %dx.dot
64+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v4i64(<4 x i64>
65+
// CHECK: ret i64 %hlsl.dot
8966
int64_t test_dot_long4(int64_t4 p0, int64_t4 p1) { return dot(p0, p1); }
9067

91-
// CHECK: %dx.dot = mul i64 %0, %1
92-
// CHECK: ret i64 %dx.dot
68+
// CHECK: %hlsl.dot = mul i64
69+
// CHECK: ret i64 %hlsl.dot
9370
uint64_t test_dot_ulong(uint64_t p0, uint64_t p1) { return dot(p0, p1); }
9471

95-
// CHECK: %dx.dot = call i64 @llvm.dx.udot.v2i64(<2 x i64> %0, <2 x i64> %1)
96-
// CHECK: ret i64 %dx.dot
72+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v2i64(<2 x i64>
73+
// CHECK: ret i64 %hlsl.dot
9774
uint64_t test_dot_ulong2(uint64_t2 p0, uint64_t2 p1) { return dot(p0, p1); }
9875

99-
// CHECK: %dx.dot = call i64 @llvm.dx.udot.v3i64(<3 x i64> %0, <3 x i64> %1)
100-
// CHECK: ret i64 %dx.dot
76+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v3i64(<3 x i64>
77+
// CHECK: ret i64 %hlsl.dot
10178
uint64_t test_dot_ulong3(uint64_t3 p0, uint64_t3 p1) { return dot(p0, p1); }
10279

103-
// CHECK: %dx.dot = call i64 @llvm.dx.udot.v4i64(<4 x i64> %0, <4 x i64> %1)
104-
// CHECK: ret i64 %dx.dot
80+
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v4i64(<4 x i64>
81+
// CHECK: ret i64 %hlsl.dot
10582
uint64_t test_dot_ulong4(uint64_t4 p0, uint64_t4 p1) { return dot(p0, p1); }
10683

107-
// NATIVE_HALF: %dx.dot = fmul half %0, %1
108-
// NATIVE_HALF: ret half %dx.dot
109-
// NO_HALF: %dx.dot = fmul float %0, %1
110-
// NO_HALF: ret float %dx.dot
84+
#ifdef __HLSL_ENABLE_16_BIT
85+
// NATIVE_HALF: %hlsl.dot = mul i16
86+
// NATIVE_HALF: ret i16 %hlsl.dot
87+
int16_t test_dot_short(int16_t p0, int16_t p1) { return dot(p0, p1); }
88+
89+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v2i16(<2 x i16>
90+
// NATIVE_HALF: ret i16 %hlsl.dot
91+
int16_t test_dot_short2(int16_t2 p0, int16_t2 p1) { return dot(p0, p1); }
92+
93+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v3i16(<3 x i16>
94+
// NATIVE_HALF: ret i16 %hlsl.dot
95+
int16_t test_dot_short3(int16_t3 p0, int16_t3 p1) { return dot(p0, p1); }
96+
97+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v4i16(<4 x i16>
98+
// NATIVE_HALF: ret i16 %hlsl.dot
99+
int16_t test_dot_short4(int16_t4 p0, int16_t4 p1) { return dot(p0, p1); }
100+
101+
// NATIVE_HALF: %hlsl.dot = mul i16
102+
// NATIVE_HALF: ret i16 %hlsl.dot
103+
uint16_t test_dot_ushort(uint16_t p0, uint16_t p1) { return dot(p0, p1); }
104+
105+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v2i16(<2 x i16>
106+
// NATIVE_HALF: ret i16 %hlsl.dot
107+
uint16_t test_dot_ushort2(uint16_t2 p0, uint16_t2 p1) { return dot(p0, p1); }
108+
109+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v3i16(<3 x i16>
110+
// NATIVE_HALF: ret i16 %hlsl.dot
111+
uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); }
112+
113+
// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v4i16(<4 x i16>
114+
// NATIVE_HALF: ret i16 %hlsl.dot
115+
uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); }
116+
#endif
117+
118+
// NATIVE_HALF: %hlsl.dot = fmul half
119+
// NATIVE_HALF: ret half %hlsl.dot
120+
// NO_HALF: %hlsl.dot = fmul float
121+
// NO_HALF: ret float %hlsl.dot
111122
half test_dot_half(half p0, half p1) { return dot(p0, p1); }
112123

113-
// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot2.v2f16(<2 x half> %0, <2 x half> %1)
114-
// NATIVE_HALF: ret half %dx.dot
115-
// NO_HALF: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %0, <2 x float> %1)
116-
// NO_HALF: ret float %dx.dot
124+
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v2f16(<2 x half>
125+
// NATIVE_HALF: ret half %hlsl.dot
126+
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
127+
// NO_HALF: ret float %hlsl.dot
117128
half test_dot_half2(half2 p0, half2 p1) { return dot(p0, p1); }
118129

119-
// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot3.v3f16(<3 x half> %0, <3 x half> %1)
120-
// NATIVE_HALF: ret half %dx.dot
121-
// NO_HALF: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %0, <3 x float> %1)
122-
// NO_HALF: ret float %dx.dot
130+
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v3f16(<3 x half>
131+
// NATIVE_HALF: ret half %hlsl.dot
132+
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
133+
// NO_HALF: ret float %hlsl.dot
123134
half test_dot_half3(half3 p0, half3 p1) { return dot(p0, p1); }
124135

125-
// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot4.v4f16(<4 x half> %0, <4 x half> %1)
126-
// NATIVE_HALF: ret half %dx.dot
127-
// NO_HALF: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %0, <4 x float> %1)
128-
// NO_HALF: ret float %dx.dot
136+
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v4f16(<4 x half>
137+
// NATIVE_HALF: ret half %hlsl.dot
138+
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
139+
// NO_HALF: ret float %hlsl.dot
129140
half test_dot_half4(half4 p0, half4 p1) { return dot(p0, p1); }
130141

131-
// CHECK: %dx.dot = fmul float %0, %1
132-
// CHECK: ret float %dx.dot
142+
// CHECK: %hlsl.dot = fmul float
143+
// CHECK: ret float %hlsl.dot
133144
float test_dot_float(float p0, float p1) { return dot(p0, p1); }
134145

135-
// CHECK: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %0, <2 x float> %1)
136-
// CHECK: ret float %dx.dot
146+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
147+
// CHECK: ret float %hlsl.dot
137148
float test_dot_float2(float2 p0, float2 p1) { return dot(p0, p1); }
138149

139-
// CHECK: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %0, <3 x float> %1)
140-
// CHECK: ret float %dx.dot
150+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
151+
// CHECK: ret float %hlsl.dot
141152
float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); }
142153

143-
// CHECK: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %0, <4 x float> %1)
144-
// CHECK: ret float %dx.dot
154+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
155+
// CHECK: ret float %hlsl.dot
145156
float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); }
146157

147-
// CHECK: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %splat.splat, <2 x float> %1)
148-
// CHECK: ret float %dx.dot
158+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> %splat.splat, <2 x float>
159+
// CHECK: ret float %hlsl.dot
149160
float test_dot_float2_splat(float p0, float2 p1) { return dot(p0, p1); }
150161

151-
// CHECK: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %splat.splat, <3 x float> %1)
152-
// CHECK: ret float %dx.dot
162+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> %splat.splat, <3 x float>
163+
// CHECK: ret float %hlsl.dot
153164
float test_dot_float3_splat(float p0, float3 p1) { return dot(p0, p1); }
154165

155-
// CHECK: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %splat.splat, <4 x float> %1)
156-
// CHECK: ret float %dx.dot
166+
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> %splat.splat, <4 x float>
167+
// CHECK: ret float %hlsl.dot
157168
float test_dot_float4_splat(float p0, float4 p1) { return dot(p0, p1); }
158169

159-
// CHECK: %dx.dot = fmul double %0, %1
160-
// CHECK: ret double %dx.dot
170+
// CHECK: %hlsl.dot = fmul double
171+
// CHECK: ret double %hlsl.dot
161172
double test_dot_double(double p0, double p1) { return dot(p0, p1); }

0 commit comments

Comments
 (0)