[HLSL] Don't use CreateRuntimeFunction for intrinsics (#145334)

nikic · web-flow · commit 1128a4fd2c3a · 2025-06-23T17:37:33.000+02:00
HLSL uses CreateRuntimeFunction for three intrinsics. This is pretty
unusual thing to do, and doesn't match what the rest of the file does.

I suspect this might be because these are convergent calls, but the
intrinsics themselves are already marked convergent, so it's not
necessary for clang to manually add the attribute.

This does lose the spir_func CC on the intrinsic declaration, but again,
CC should not be relevant to intrinsics at all.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -676,35 +676,23 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_wave_active_sum: {
     // Due to the use of variadic arguments, explicitly retreive argument
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
-    llvm::FunctionType *FT = llvm::FunctionType::get(
-        OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
     Intrinsic::ID IID = getWaveActiveSumIntrinsic(
         getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
         E->getArg(0)->getType());
 
-    // Get overloaded name
-    std::string Name =
-        Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
-    return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
-                                                     /*Local=*/false,
-                                                     /*AssumeConvergent=*/true),
+    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+                               &CGM.getModule(), IID, {OpExpr->getType()}),
                            ArrayRef{OpExpr}, "hlsl.wave.active.sum");
   }
   case Builtin::BI__builtin_hlsl_wave_active_max: {
     // Due to the use of variadic arguments, explicitly retreive argument
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
-    llvm::FunctionType *FT = llvm::FunctionType::get(
-        OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
     Intrinsic::ID IID = getWaveActiveMaxIntrinsic(
         getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
         E->getArg(0)->getType());
 
-    // Get overloaded name
-    std::string Name =
-        Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
-    return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
-                                                     /*Local=*/false,
-                                                     /*AssumeConvergent=*/true),
+    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+                               &CGM.getModule(), IID, {OpExpr->getType()}),
                            ArrayRef{OpExpr}, "hlsl.wave.active.max");
   }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
@@ -739,18 +727,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     // create our function type.
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Value *OpIndex = EmitScalarExpr(E->getArg(1));
-    llvm::FunctionType *FT = llvm::FunctionType::get(
-        OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
-        false);
-
-    // Get overloaded name
-    std::string Name =
-        Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
-                           ArrayRef{OpExpr->getType()}, &CGM.getModule());
-    return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
-                                                     /*Local=*/false,
-                                                     /*AssumeConvergent=*/true),
-                           ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
+    return EmitRuntimeCall(
+        Intrinsic::getOrInsertDeclaration(
+            &CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
+            {OpExpr->getType()}),
+        ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
   }
   case Builtin::BI__builtin_hlsl_elementwise_sign: {
     auto *Arg0 = E->getArg(0);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
@@ -16,7 +16,7 @@ int test_int(int expr) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.max.i32([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
@@ -27,7 +27,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
 
 // Test basic lowering to runtime function call with array and float value.
 
@@ -40,7 +40,7 @@ float4 test_floatv4(float4 expr) {
 }
 
 // CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
 
 // CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
@@ -16,7 +16,7 @@ int test_int(int expr) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.sum.i32([[TY]]) #[[#attr:]]
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
@@ -27,7 +27,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.usum.i64([[TY]]) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.sum.i64([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.sum.i64([[TY]]) #[[#attr:]]
 
 // Test basic lowering to runtime function call with array and float value.
 
@@ -40,6 +40,6 @@ float4 test_floatv4(float4 expr) {
 }
 
 // CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]]) #[[#attr]]
 
 // CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
@@ -17,7 +17,7 @@ int test_int(int expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i32([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i32([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i32([[TY]], i32) #[[#attr:]]
 
 // CHECK-LABEL: test_uint
 uint test_uint(uint expr, uint idx) {
@@ -38,7 +38,7 @@ int64_t test_int64_t(int64_t expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i64([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i64([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i64([[TY]], i32) #[[#attr:]]
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr, uint idx) {
@@ -60,7 +60,7 @@ int16_t test_int16(int16_t expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.i16([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.i16([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.i16([[TY]], i32) #[[#attr:]]
 
 // CHECK-LABEL: test_uint16
 uint16_t test_uint16(uint16_t expr, uint idx) {
@@ -84,7 +84,7 @@ half test_half(half expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.f16([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.f16([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.f16([[TY]], i32) #[[#attr:]]
 
 // CHECK-LABEL: test_double
 double test_double(double expr, uint idx) {
@@ -96,7 +96,7 @@ double test_double(double expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.readlane.f64([[TY]], i32) #[[#attr:]]
-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.readlane.f64([[TY]], i32) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.readlane.f64([[TY]], i32) #[[#attr:]]
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr, uint idx) {
@@ -108,6 +108,6 @@ float4 test_floatv4(float4 expr, uint idx) {
 }
 
 // CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.readlane.v4f32([[TY1]], i32) #[[#attr]]
 
 // CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ int test_int(int expr) {`
`16`	`16`	`}`
`17`	`17`
`18`	`18`	`// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.max.i32([[TY]]) #[[#attr:]]`
`19`		`-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]`
	`19`	`+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]`
`20`	`20`
`21`	`21`	`// CHECK-LABEL: test_uint64_t`
`22`	`22`	`uint64_t test_uint64_t(uint64_t expr) {`
`@@ -27,7 +27,7 @@ uint64_t test_uint64_t(uint64_t expr) {`
`27`	`27`	`}`
`28`	`28`
`29`	`29`	`// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.umax.i64([[TY]]) #[[#attr:]]`
`30`		`-// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]`
	`30`	`+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]`
`31`	`31`
`32`	`32`	`// Test basic lowering to runtime function call with array and float value.`
`33`	`33`
`@@ -40,7 +40,7 @@ float4 test_floatv4(float4 expr) {`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]`
`43`		`-// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]`
	`43`	`+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]`
`44`	`44`
`45`	`45`	`// CHECK: attributes #[[#attr]] = {{{.}} convergent {{.}}}`
`46`	`46`