Skip to content

Commit f0c397e

Browse files
committed
[llvm][fatlto] Add FatLTOCleanup pass
When using FatLTO, it is common to want to enable certain types of whole program optimizations (WPD) or security transforms (CFI), so that they can be made available when performing LTO. However, these transforms should not be used when compiling the non-LTO object code. Since the frontend must emit different IR, we cannot simply clone the module and optimize the LTO section and non-LTO section differently to work around this. Instead, we need to remove any problematic instruction sequences. This patch adds a new pass whose responsibility is to clean up the IR in the FatLTO pipeline after creating the bitcode section, which is after running the pre-link pipeline but before running module optimization. This allows us to safely drop any conflicting instructions or IR constructs that are inappropriate for non-LTO compilation.
1 parent ea6827c commit f0c397e

File tree

8 files changed

+297
-9
lines changed

8 files changed

+297
-9
lines changed

clang/test/CodeGen/fat-lto-objects-cfi.cpp

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
// REQUIRES: x86-registered-target
22

3+
// RUN: rm -rf %t && split-file %s %t
34
// RUN: %clang_cc1 -triple x86_64-unknown-fuchsia -O2 -flto -ffat-lto-objects \
4-
// RUN: -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -fvisibility=hidden -emit-llvm -o - %s \
5+
// RUN: -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -fvisibility=hidden \
6+
// RUN: -emit-llvm -o - %t/a.cpp \
57
// RUN: | FileCheck %s --check-prefix=TYPE_TEST
68

9+
//--- a.cpp
710
// TYPE_TEST: llvm.embedded.object
811
// TYPE_TEST-SAME: section ".llvm.lto"
912

13+
// COM: The FatLTO pipeline should remove all llvm.type.test instructions.
1014
// TYPE_TEST-LABEL: define hidden void @foo
11-
// TYPE_TEST: entry:
12-
// TYPE_TEST-NEXT: %cmp14.not = icmp eq i64 %len, 0
13-
// TYPE_TEST-NEXT: br i1 %cmp14.not, label %for.end7, label %for.cond1.preheader.preheader
14-
// TYPE_TEST: for.cond1.preheader.preheader: ; preds = %entry
15-
// TYPE_TEST-NEXT: %arrayidx.1 = getelementptr inbounds nuw i8, ptr %ptr, i64 4
16-
// TYPE_TEST-NEXT: br label %for.cond1.preheader
17-
18-
// TYPE_TEST-NOT: @llvm.type.test
15+
// TYPE_TEST-NOT: @llvm.type.test
16+
// TYPE_TEST-NEXT: entry:
17+
// TYPE_TEST-NEXT: %cmp14.not = icmp eq i64 %len, 0
18+
// TYPE_TEST-NEXT: br i1 %cmp14.not, label %for.end7, label %for.cond1.preheader.preheader
19+
// TYPE_TEST-LABEL: for.cond1.preheader.preheader: ; preds = %entry
20+
// TYPE_TEST-NEXT: %arrayidx.1 = getelementptr inbounds nuw i8, ptr %ptr, i64 4
21+
// TYPE_TEST-NEXT: br label %for.cond1.preheader
1922

2023
// The code below is a reduced case from https://github.com/llvm/llvm-project/issues/112053
2124
#define __PRINTFLIKE(__fmt, __varargs) __attribute__((__format__(__printf__, __fmt, __varargs)))
@@ -44,3 +47,44 @@ void foo(const void* ptr, size_t len, long disp_addr,
4447
}
4548
}
4649

50+
//--- b.cpp
51+
// COM: Prior to the introduction of the FatLTO cleanup pass, this used to cause
52+
// COM: the backend to crash, either due to an assertion failure, or because
53+
// COM: the CFI instructions couldn't be correctly generated. So, check to make
54+
// COM: sure that the FatLTO pipeline used by clang does not regress.
55+
56+
// COM: Check the generated IR doesn't contain llvm.type.checked.load in the final IR.
57+
// RUN: %clang_cc1 -triple=x86_64-unknown-fuchsia -O1 -emit-llvm -o - \
58+
// RUN: -ffat-lto-objects -fvisibility=hidden \
59+
// RUN: -fno-rtti -fsanitize=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
60+
// RUN: -fsanitize-trap=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
61+
// RUN: -fwhole-program-vtables %t/b.cpp 2>&1 | FileCheck %s --check-prefix=NO_CHECKED_LOAD
62+
63+
// RUN: %clang_cc1 -triple=x86_64-unknown-fuchsia -O1 -emit-llvm -o - \
64+
// RUN: -ffat-lto-objects -fvisibility=hidden -fexperimental-relative-c++-abi-vtables \
65+
// RUN: -fno-rtti -fsanitize=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
66+
// RUN: -fsanitize-trap=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
67+
// RUN: -fwhole-program-vtables %t/b.cpp 2>&1 | FileCheck %s --check-prefix=NO_CHECKED_LOAD
68+
69+
// COM: Note that the embedded bitcode section will contain references to
70+
// COM: llvm.type.checked.load, so we need to match the function body first.
71+
// NO_CHECKED_LOAD-LABEL: entry:
72+
// NO_CHECKED_LOAD-NEXT: %vtable = load ptr, ptr %p1
73+
// NO_CHECKED_LOAD-NOT: llvm.type.checked.load
74+
// NO_CHECKED_LOAD-NEXT: %vfunc = load ptr, ptr %vtable
75+
// NO_CHECKED_LOAD-NEXT: %call = tail call {{.*}} %vfunc(ptr {{.*}} %p1)
76+
// NO_CHECKED_LOAD-NEXT: ret void
77+
78+
// COM: Ensure that we don't crash in the backend anymore when clang uses
79+
// COM: CFI checks with -ffat-lto-objects.
80+
// RUN: %clang_cc1 -triple=x86_64-unknown-fuchsia -O1 -emit-codegen-only \
81+
// RUN: -ffat-lto-objects -fvisibility=hidden \
82+
// RUN: -fno-rtti -fsanitize=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
83+
// RUN: -fsanitize-trap=cfi-icall,cfi-mfcall,cfi-nvcall,cfi-vcall \
84+
// RUN: -fwhole-program-vtables %t/b.cpp
85+
86+
class a {
87+
public:
88+
virtual long b();
89+
};
90+
void c(a &p1) { p1.b(); }
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===- FatLtoCleanup.h - clean up IR for the FatLTO pipeline ----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines operations used to clean up IR for the FatLTO pipeline.
10+
// Instrumentation that is beneficial for bitcode sections used in LTO may
11+
// need to be cleaned up to finish non-LTO compilation. llvm.checked.load is
12+
// an example of an instruction that we want to preserve for LTO, but is
13+
// incorrect to leave unchanged during the per-TU compilation in FatLTO.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#ifndef LLVM_TRANSFORMS_IPO_FATLTOCLEANUP_H
18+
#define LLVM_TRANSFORMS_IPO_FATLTOCLEANUP_H
19+
20+
#include "llvm/IR/PassManager.h"
21+
22+
namespace llvm {
23+
24+
class Module;
25+
class ModuleSummaryIndex;
26+
27+
class FatLtoCleanup : public PassInfoMixin<FatLtoCleanup> {
28+
public:
29+
FatLtoCleanup() {}
30+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
31+
static bool isRequired() { return true; }
32+
};
33+
34+
} // end namespace llvm
35+
36+
#endif // LLVM_TRANSFORMS_IPO_FATLTOCLEANUP_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@
183183
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
184184
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
185185
#include "llvm/Transforms/IPO/ExpandVariadics.h"
186+
#include "llvm/Transforms/IPO/FatLTOCleanup.h"
186187
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
187188
#include "llvm/Transforms/IPO/FunctionAttrs.h"
188189
#include "llvm/Transforms/IPO/FunctionImport.h"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
5454
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
5555
#include "llvm/Transforms/IPO/ExpandVariadics.h"
56+
#include "llvm/Transforms/IPO/FatLTOCleanup.h"
5657
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
5758
#include "llvm/Transforms/IPO/FunctionAttrs.h"
5859
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -1657,6 +1658,12 @@ PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
16571658
MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
16581659
MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
16591660

1661+
// Perform any cleanups to the IR that aren't suitable for per TU compilation,
1662+
// like removing CFI/WPD related instructions. Note, we reuse
1663+
// LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1664+
// in FatLtoCleanup.
1665+
MPM.addPass(FatLtoCleanup());
1666+
16601667
// If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
16611668
// object code, only in the bitcode section, so drop it before we run
16621669
// module optimization and generate machine code. If llvm.type.test() isn't in

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ MODULE_PASS("lower-emutls", LowerEmuTLSPass())
9898
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
9999
MODULE_PASS("lower-ifunc", LowerIFuncPass())
100100
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
101+
MODULE_PASS("fatlto-cleanup", FatLtoCleanup())
101102
MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default))
102103
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
103104
MODULE_PASS("memprof-module", ModuleMemProfilerPass())

llvm/lib/Transforms/IPO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMipo
1414
EmbedBitcodePass.cpp
1515
ExpandVariadics.cpp
1616
ExtractGV.cpp
17+
FatLTOCleanup.cpp
1718
ForceFunctionAttrs.cpp
1819
FunctionAttrs.cpp
1920
FunctionImport.cpp
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
//===- FatLtoCleanup.cpp - clean up IR for the FatLTO pipeline --*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines operations used to clean up IR for the FatLTO pipeline.
10+
// Instrumentation that is beneficial for bitcode sections used in LTO may
11+
// need to be cleaned up to finish non-LTO compilation. llvm.checked.load is
12+
// an example of an instruction that we want to preserve for LTO, but is
13+
// incorrect to leave unchanged during the per-TU compilation in FatLTO.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "llvm/Transforms/IPO/FatLTOCleanup.h"
18+
#include "llvm/ADT/SetVector.h"
19+
#include "llvm/IR/Function.h"
20+
#include "llvm/IR/IRBuilder.h"
21+
#include "llvm/IR/Intrinsics.h"
22+
#include "llvm/IR/Module.h"
23+
#include "llvm/IR/PassManager.h"
24+
#include "llvm/IR/Use.h"
25+
#include "llvm/Support/Debug.h"
26+
27+
using namespace llvm;
28+
29+
#define DEBUG_TYPE "fatlto-cleanup"
30+
31+
namespace {
32+
// Replaces uses of llvm.type.checked.load instructions with unchecked loads.
33+
// In essence, we're undoing the frontends instrumentation, since it isn't
34+
// correct for the non-LTO part of a FatLTO object.
35+
//
36+
// llvm.type.checked.load instruction sequences always have a particular form:
37+
//
38+
// clang-format off
39+
//
40+
// %0 = tail call { ptr, i1 } @llvm.type.checked.load(ptr %vtable, i32 0, metadata !"foo"), !nosanitize !0
41+
// %1 = extractvalue { ptr, i1 } %0, 1, !nosanitize !0
42+
// br i1 %1, label %cont2, label %trap1, !nosanitize !0
43+
//
44+
// trap1: ; preds = %entry
45+
// tail call void @llvm.ubsantrap(i8 2) #3, !nosanitize !0
46+
// unreachable, !nosanitize !0
47+
//
48+
// cont2: ; preds = %entry
49+
// %2 = extractvalue { ptr, i1 } %0, 0, !nosanitize !0
50+
// %call = tail call noundef i64 %2(ptr noundef nonnull align 8 dereferenceable(8) %p1) #4
51+
//
52+
// clang-format on
53+
//
54+
// In this sequence, the vtable pointer is first loaded and checked against some
55+
// metadata. The result indicates failure, then the program traps. On the
56+
// success path, the pointer is used to make an indirect call to the function
57+
// pointer loaded from the vtable.
58+
//
59+
// Since we won't be able to lower this correctly later in non-LTO builds, we
60+
// need to drop the special load and trap, and emit a normal load of the
61+
// function pointer from the vtable.
62+
//
63+
// This is straight forward, since the checked load can be replaced w/ a load
64+
// of the vtable pointer and a GEP instruction to index into the vtable and get
65+
// the correct method/function pointer. We replace the "check" with a constant
66+
// indicating success, which allows later passes to simplify control flow and
67+
// remove any now dead instructions.
68+
//
69+
// This logic holds for both llvm.type.checked.load and
70+
// llvm.type.checked.load.relative instructions.
71+
static bool cleanUpTypeCheckedLoad(Module &M, Function &CheckedLoadFn,
72+
bool IsRelative) {
73+
bool Changed = false;
74+
for (User *User : llvm::make_early_inc_range(CheckedLoadFn.users())) {
75+
Instruction *I = dyn_cast<Instruction>(User);
76+
if (!I)
77+
continue;
78+
IRBuilder<> IRB(I);
79+
Value *Ptr = I->getOperand(0);
80+
Value *Offset = I->getOperand(1);
81+
Type *PtrTy = I->getType()->getStructElementType(0);
82+
ConstantInt *True = ConstantInt::getTrue(M.getContext());
83+
Instruction *Load;
84+
if (IsRelative) {
85+
Function *LoadRelIntrinsic = llvm::Intrinsic::getOrInsertDeclaration(
86+
&M, Intrinsic::load_relative, {Offset->getType()});
87+
Load = IRB.CreateCall(LoadRelIntrinsic, {Ptr, Offset}, "rel_load");
88+
} else {
89+
Value *PtrAdd = IRB.CreatePtrAdd(Ptr, Offset);
90+
Load = IRB.CreateLoad(PtrTy, PtrAdd, "vfunc");
91+
}
92+
93+
Value *Replacement = PoisonValue::get(I->getType());
94+
Replacement = IRB.CreateInsertValue(Replacement, True, {1});
95+
Replacement = IRB.CreateInsertValue(Replacement, Load, {0});
96+
I->replaceAllUsesWith(Replacement);
97+
98+
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": erase " << *I << "\n");
99+
I->eraseFromParent();
100+
Changed = true;
101+
}
102+
if (Changed)
103+
CheckedLoadFn.eraseFromParent();
104+
return Changed;
105+
}
106+
} // namespace
107+
108+
PreservedAnalyses FatLtoCleanup::run(Module &M, ModuleAnalysisManager &AM) {
109+
Function *TypeCheckedLoadFn =
110+
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
111+
Function *TypeCheckedLoadRelFn = Intrinsic::getDeclarationIfExists(
112+
&M, Intrinsic::type_checked_load_relative);
113+
114+
bool Changed = false;
115+
if (TypeCheckedLoadFn)
116+
Changed |= cleanUpTypeCheckedLoad(M, *TypeCheckedLoadFn, false);
117+
if (TypeCheckedLoadRelFn)
118+
Changed |= cleanUpTypeCheckedLoad(M, *TypeCheckedLoadRelFn, true);
119+
120+
if (Changed)
121+
return PreservedAnalyses::none();
122+
return PreservedAnalyses::all();
123+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: opt -passes="fatlto-cleanup" < %s -S | FileCheck %s
4+
5+
declare void @llvm.ubsantrap(i8 immarg)
6+
declare { ptr, i1 } @llvm.type.checked.load(ptr, i32, metadata)
7+
declare { ptr, i1 } @llvm.type.checked.load.relative(ptr, i32, metadata)
8+
9+
define hidden void @foo(ptr %p1) {
10+
; CHECK-LABEL: define hidden void @foo(
11+
; CHECK-SAME: ptr [[P1:%.*]]) {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[P1]], align 8
14+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VTABLE]], i32 0
15+
; CHECK-NEXT: [[VFUNC:%.*]] = load ptr, ptr [[TMP0]], align 8
16+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i1 } { ptr poison, i1 true }, ptr [[VFUNC]], 0
17+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 1
18+
; CHECK-NEXT: br i1 [[TMP3]], label %[[CONT2:.*]], label %[[TRAP1:.*]]
19+
; CHECK: [[TRAP1]]:
20+
; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 2)
21+
; CHECK-NEXT: unreachable
22+
; CHECK: [[CONT2]]:
23+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 0
24+
; CHECK-NEXT: [[CALL:%.*]] = tail call noundef i64 [[TMP4]](ptr noundef nonnull align 8 dereferenceable(8) [[P1]])
25+
; CHECK-NEXT: ret void
26+
;
27+
entry:
28+
%vtable = load ptr, ptr %p1, align 8
29+
%0 = tail call { ptr, i1 } @llvm.type.checked.load(ptr %vtable, i32 0, metadata !"_ZTS1a")
30+
%1 = extractvalue { ptr, i1 } %0, 1
31+
br i1 %1, label %cont2, label %trap1
32+
33+
trap1:
34+
tail call void @llvm.ubsantrap(i8 2)
35+
unreachable
36+
37+
cont2:
38+
%2 = extractvalue { ptr, i1 } %0, 0
39+
%call = tail call noundef i64 %2(ptr noundef nonnull align 8 dereferenceable(8) %p1)
40+
ret void
41+
}
42+
43+
define hidden void @relative.vtable(ptr %p1) {
44+
; CHECK-LABEL: define hidden void @relative.vtable(
45+
; CHECK-SAME: ptr [[P1:%.*]]) {
46+
; CHECK-NEXT: [[ENTRY:.*:]]
47+
; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[P1]], align 8
48+
; CHECK-NEXT: [[REL_LOAD:%.*]] = call ptr @llvm.load.relative.i32(ptr [[VTABLE]], i32 0)
49+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i1 } { ptr poison, i1 true }, ptr [[REL_LOAD]], 0
50+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 1
51+
; CHECK-NEXT: br i1 [[TMP3]], label %[[CONT2:.*]], label %[[TRAP1:.*]]
52+
; CHECK: [[TRAP1]]:
53+
; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 2)
54+
; CHECK-NEXT: unreachable
55+
; CHECK: [[CONT2]]:
56+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 0
57+
; CHECK-NEXT: [[CALL:%.*]] = tail call noundef i64 [[TMP4]](ptr noundef nonnull align 8 dereferenceable(8) [[P1]])
58+
; CHECK-NEXT: ret void
59+
;
60+
entry:
61+
%vtable = load ptr, ptr %p1, align 8
62+
%0 = tail call { ptr, i1 } @llvm.type.checked.load.relative(ptr %vtable, i32 0, metadata !"rel.vtable.type")
63+
%1 = extractvalue { ptr, i1 } %0, 1
64+
br i1 %1, label %cont2, label %trap1
65+
66+
trap1:
67+
tail call void @llvm.ubsantrap(i8 2)
68+
unreachable
69+
70+
cont2:
71+
%2 = extractvalue { ptr, i1 } %0, 0
72+
%call = tail call noundef i64 %2(ptr noundef nonnull align 8 dereferenceable(8) %p1)
73+
ret void
74+
}
75+

0 commit comments

Comments
 (0)