Skip to content

Commit 7c53fc4

Browse files
committed
[clang] Emit target_clones resolver functions as COMDAT.
Previously, resolver functions synthesized for target_clones multiversion functions were not emitted as COMDAT. Now fixed.
1 parent 3531a4f commit 7c53fc4

File tree

2 files changed

+26
-10
lines changed

2 files changed

+26
-10
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3454,6 +3454,10 @@ void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) {
34543454
Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
34553455
}
34563456

3457+
if (supportsCOMDAT())
3458+
ResolverFunc->setComdat(
3459+
getModule().getOrInsertComdat(ResolverFunc->getName()));
3460+
34573461
const TargetInfo &TI = getTarget();
34583462
std::stable_sort(
34593463
Options.begin(), Options.end(),

clang/test/CodeGen/attr-target-clones.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=LINUX,CHECK
22
// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK
33

4+
// LINUX: $foo.resolver = comdat any
5+
// LINUX: $foo_dupes.resolver = comdat any
6+
// LINUX: $unused.resolver = comdat any
7+
// LINUX: $foo_inline.resolver = comdat any
8+
// LINUX: $foo_inline2.resolver = comdat any
9+
10+
// WINDOWS: $foo = comdat any
11+
// WINDOWS: $foo_dupes = comdat any
12+
// WINDOWS: $unused = comdat any
13+
// WINDOWS: $foo_inline = comdat any
14+
// WINDOWS: $foo_inline2 = comdat any
15+
416
// LINUX: @foo.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo.resolver
517
// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), void ()* ()* @foo_dupes.resolver
618
// LINUX: @unused.ifunc = weak_odr ifunc void (), void ()* ()* @unused.resolver
@@ -10,26 +22,26 @@
1022
int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; }
1123
// LINUX: define {{.*}}i32 @foo.sse4.2.0()
1224
// LINUX: define {{.*}}i32 @foo.default.1()
13-
// LINUX: define i32 ()* @foo.resolver()
25+
// LINUX: define i32 ()* @foo.resolver() comdat
1426
// LINUX: ret i32 ()* @foo.sse4.2.0
1527
// LINUX: ret i32 ()* @foo.default.1
1628

1729
// WINDOWS: define dso_local i32 @foo.sse4.2.0()
1830
// WINDOWS: define dso_local i32 @foo.default.1()
19-
// WINDOWS: define dso_local i32 @foo()
31+
// WINDOWS: define dso_local i32 @foo() comdat
2032
// WINDOWS: musttail call i32 @foo.sse4.2.0
2133
// WINDOWS: musttail call i32 @foo.default.1
2234

2335
__attribute__((target_clones("default,default ,sse4.2"))) void foo_dupes(void) {}
2436
// LINUX: define {{.*}}void @foo_dupes.default.1()
2537
// LINUX: define {{.*}}void @foo_dupes.sse4.2.0()
26-
// LINUX: define void ()* @foo_dupes.resolver()
38+
// LINUX: define void ()* @foo_dupes.resolver() comdat
2739
// LINUX: ret void ()* @foo_dupes.sse4.2.0
2840
// LINUX: ret void ()* @foo_dupes.default.1
2941

3042
// WINDOWS: define dso_local void @foo_dupes.default.1()
3143
// WINDOWS: define dso_local void @foo_dupes.sse4.2.0()
32-
// WINDOWS: define dso_local void @foo_dupes()
44+
// WINDOWS: define dso_local void @foo_dupes() comdat
3345
// WINDOWS: musttail call void @foo_dupes.sse4.2.0
3446
// WINDOWS: musttail call void @foo_dupes.default.1
3547

@@ -52,13 +64,13 @@ int bar(void) {
5264
void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {}
5365
// LINUX: define {{.*}}void @unused.default.1()
5466
// LINUX: define {{.*}}void @unused.arch_ivybridge.0()
55-
// LINUX: define void ()* @unused.resolver()
67+
// LINUX: define void ()* @unused.resolver() comdat
5668
// LINUX: ret void ()* @unused.arch_ivybridge.0
5769
// LINUX: ret void ()* @unused.default.1
5870

5971
// WINDOWS: define dso_local void @unused.default.1()
6072
// WINDOWS: define dso_local void @unused.arch_ivybridge.0()
61-
// WINDOWS: define dso_local void @unused()
73+
// WINDOWS: define dso_local void @unused() comdat
6274
// WINDOWS: musttail call void @unused.arch_ivybridge.0
6375
// WINDOWS: musttail call void @unused.default.1
6476

@@ -79,26 +91,26 @@ int bar3(void) {
7991
}
8092

8193
// Deferred emission of foo_inline, which got delayed because it is inline.
82-
// LINUX: define i32 ()* @foo_inline.resolver()
94+
// LINUX: define i32 ()* @foo_inline.resolver() comdat
8395
// LINUX: ret i32 ()* @foo_inline.arch_sandybridge.0
8496
// LINUX: ret i32 ()* @foo_inline.sse4.2.1
8597
// LINUX: ret i32 ()* @foo_inline.default.2
8698

87-
// WINDOWS: define dso_local i32 @foo_inline()
99+
// WINDOWS: define dso_local i32 @foo_inline() comdat
88100
// WINDOWS: musttail call i32 @foo_inline.arch_sandybridge.0
89101
// WINDOWS: musttail call i32 @foo_inline.sse4.2.1
90102
// WINDOWS: musttail call i32 @foo_inline.default.2
91103

92104
inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
93105
foo_inline2(void){ return 0; }
94106
// LINUX: define linkonce i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
95-
// LINUX: define i32 ()* @foo_inline2.resolver()
107+
// LINUX: define i32 ()* @foo_inline2.resolver() comdat
96108
// LINUX: ret i32 ()* @foo_inline2.arch_sandybridge.0
97109
// LINUX: ret i32 ()* @foo_inline2.sse4.2.1
98110
// LINUX: ret i32 ()* @foo_inline2.default.2
99111

100112
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
101-
// WINDOWS: define dso_local i32 @foo_inline2()
113+
// WINDOWS: define dso_local i32 @foo_inline2() comdat
102114
// WINDOWS: musttail call i32 @foo_inline2.arch_sandybridge.0
103115
// WINDOWS: musttail call i32 @foo_inline2.sse4.2.1
104116
// WINDOWS: musttail call i32 @foo_inline2.default.2

0 commit comments

Comments
 (0)