Skip to content

Commit f04a746

Browse files
committed
[HLSL] Set function optnone attribute appropriately
When optimization is disabled, set optnone attribute - for all module functions when targetting Library shaders - only for entry function when targetting non-Library shaders Update tests in accordance with the change.
1 parent a57bbff commit f04a746

10 files changed

+164
-79
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2545,7 +2545,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
25452545
// Non-entry HLSL functions must always be inlined.
25462546
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
25472547
!D->hasAttr<NoInlineAttr>()) {
2548-
B.addAttribute(llvm::Attribute::AlwaysInline);
2548+
// Set OptimizeNone for HLSL entry functions if ShouldAddOptNone
2549+
// or for all HLSL functions compiled for Library target.
2550+
llvm::Triple T(F->getParent()->getTargetTriple());
2551+
if (ShouldAddOptNone &&
2552+
(D->hasAttr<HLSLShaderAttr>() ||
2553+
T.getEnvironment() == llvm::Triple::EnvironmentType::Library)) {
2554+
B.addAttribute(llvm::Attribute::OptimizeNone);
2555+
B.addAttribute(llvm::Attribute::NoInline);
2556+
} else
2557+
B.addAttribute(llvm::Attribute::AlwaysInline);
25492558
} else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
25502559
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
25512560
// Add optnone, but do so only if the function isn't always_inline.

clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,10 @@ void main(unsigned GI : SV_GroupIndex) {}
3232
// NOINLINE-NEXT: call void @_Z12call_me_lastv(
3333
// NOINLINE-NEXT: ret void
3434

35-
// Verify constructor calls are inlined when AlwaysInline is run
36-
// INLINE-NEXT: alloca
35+
// Verify constructor calls are inlined
3736
// INLINE-NEXT: store i32 12
3837
// INLINE-NEXT: store i32 13
3938
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
40-
// INLINE-NEXT: store i32 %
39+
// INLINE-NEXT: call void @_Z4mainj(i32 %0)
4140
// INLINE-NEXT: store i32 0
4241
// INLINE: ret void

clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
2-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
33

44
// Make sure global variable for ctors exist for lib profile.
55
// CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
3131
// CHECK: ret void
3232

3333

34-
// Verify the constructor is alwaysinline
35-
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
34+
// Verify the constructor is optnone
35+
// NOINLINE: ; Function Attrs: {{.*}} optnone
3636
// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
3737

3838
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
3939
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
4040

4141
// NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
42-
// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
42+
// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone

clang/test/CodeGenHLSL/GlobalDestructors.hlsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
22
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
3-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
4-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
3+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
4+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
55

66
// Tests that constructors and destructors are appropriately generated for globals
77
// and that their calls are inlined when AlwaysInline is run

clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ RWBuffer<int> Out;
66

77
[numthreads(1,1,1)]
88
void main(unsigned GI : SV_GroupIndex) {
9-
// CHECK: define void @main()
9+
// DXC: define internal void @_Z4mainj(i32 noundef %GI)
10+
// SPIRV: define internal spir_func void @_Z4mainj(i32 noundef %GI)
1011

1112
// DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
1213
// SPIRV: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_i32_5_2_0_0_2_0t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})

clang/test/CodeGenHLSL/builtins/StructuredBuffers-subscripts.hlsl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ RasterizerOrderedStructuredBuffer<int> Out2;
66

77
[numthreads(1,1,1)]
88
void main(unsigned GI : SV_GroupIndex) {
9-
// CHECK: define void @main()
10-
9+
// CHECK: define internal void @_Z4mainj(i32 noundef %GI)
1110
// CHECK: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_0_0t(target("dx.RawBuffer", i32, 0, 0) %{{.*}}, i32 %{{.*}})
1211
// CHECK: %[[LOAD:.*]] = load i32, ptr %[[INPTR]]
1312
// CHECK: %[[OUT1PTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %{{.*}}, i32 %{{.*}})

clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2-
// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
1+
// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,LIB
2+
// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,COMPUTE
33

44
// Verify that a few different function types all get the NoRecurse attribute
55

@@ -11,7 +11,8 @@ struct Node {
1111
uint left, right;
1212
};
1313

14-
// CHECK: Function Attrs:{{.*}}norecurse
14+
// LIB: Function Attrs:{{.*}}norecurse{{.*}}optnone
15+
// COMPUTE: Function Attrs: alwaysinline{{.*}}norecurse
1516
// CHECK: define noundef i32 @_Z4FindA100_4Nodej(ptr noundef byval([100 x %struct.Node]) align 4 %SortedTree, i32 noundef %key) [[IntAttr:\#[0-9]+]]
1617
// CHECK: ret i32
1718
// Find and return value corresponding to key in the SortedTree
@@ -30,7 +31,8 @@ uint Find(Node SortedTree[MAX], uint key) {
3031
}
3132
}
3233

33-
// CHECK: Function Attrs:{{.*}}norecurse
34+
// LIB: Function Attrs:{{.*}}norecurse{{.*}}optnone
35+
// COMPUTE: Function Attrs: alwaysinline{{.*}}norecurse
3436
// CHECK: define noundef i1 @_Z8InitTreeA100_4NodeN4hlsl8RWBufferIDv4_jEEj(ptr noundef byval([100 x %struct.Node]) align 4 %tree, ptr noundef byval(%"class.hlsl::RWBuffer") align 4 %encodedTree, i32 noundef %maxDepth) [[ExtAttr:\#[0-9]+]]
3537
// CHECK: ret i1
3638
// Initialize tree with given buffer
@@ -51,12 +53,13 @@ bool InitTree(/*inout*/ Node tree[MAX], RWBuffer<uint4> encodedTree, uint maxDep
5153
RWBuffer<uint4> gTree;
5254

5355
// Mangled entry points are internal
54-
// CHECK: Function Attrs:{{.*}}norecurse
55-
// CHECK: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
56+
// CHECK: Function Attrs:{{.*}}norecurse{{.*}}optnone
57+
// LIB: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
58+
// COMPUTE: define internal void @_Z4mainj(i32 noundef %GI) [[IntComputeAttr:\#[0-9]]]
5659
// CHECK: ret void
5760

5861
// Canonical entry points are external and shader attributed
59-
// CHECK: Function Attrs:{{.*}}norecurse
62+
// CHECK: Function Attrs: convergent noinline norecurse
6063
// CHECK: define void @main() [[EntryAttr:\#[0-9]+]]
6164
// CHECK: ret void
6265

@@ -70,12 +73,13 @@ void main(uint GI : SV_GroupIndex) {
7073
}
7174

7275
// Mangled entry points are internal
73-
// CHECK: Function Attrs:{{.*}}norecurse
74-
// CHECK: define internal void @_Z11defaultMainv() [[IntAttr]]
76+
// CHECK: Function Attrs:{{.*}}norecurse{{.*}}optnone
77+
// LIB: define internal void @_Z11defaultMainv() [[IntAttr]]
78+
// COMPUTE: define internal void @_Z11defaultMainv() [[IntComputeAttr]]
7579
// CHECK: ret void
7680

7781
// Canonical entry points are external and shader attributed
78-
// CHECK: Function Attrs:{{.*}}norecurse
82+
// CHECK: Function Attrs: convergent noinline norecurse
7983
// CHECK: define void @defaultMain() [[EntryAttr]]
8084
// CHECK: ret void
8185

@@ -88,6 +92,7 @@ void defaultMain() {
8892
needle = Find(haystack, needle);
8993
}
9094

91-
// CHECK: attributes [[IntAttr]] = {{.*}} norecurse
92-
// CHECK: attributes [[ExtAttr]] = {{.*}} norecurse
93-
// CHECK: attributes [[EntryAttr]] = {{.*}} norecurse
95+
// CHECK: attributes [[IntAttr]] = {{.*}}norecurse
96+
// CHECK: attributes [[ExtAttr]] = {{.*}}norecurse
97+
// COMPUTE: attributes [[IntComputeAttr]] = {{.*}}norecurse
98+
// CHECK: attributes [[EntryAttr]] = {{.*}}norecurse

clang/test/CodeGenHLSL/inline-constructors.hlsl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
2-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
3-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
4-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
5-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
6-
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
1+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
2+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
3+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_INTERNAL
4+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_INTERNAL
5+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
6+
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
77

88
// Tests that implicit constructor calls for user classes will always be inlined.
99

@@ -67,6 +67,8 @@ void main(unsigned GI : SV_GroupIndex) {
6767
// NOINLINE-NEXT: call void @_Z9rainyMainv()
6868
// Verify inlining leaves only calls to "llvm." intrinsics
6969
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
70+
// Verify internal function is not inlined
71+
// NOINLINE_INTERNAL: call void @_Z9rainyMainv()
7072
// CHECK: ret void
7173
[shader("compute")]
7274
[numthreads(1,1,1)]

0 commit comments

Comments
 (0)