Skip to content

Commit e3c72e1

Browse files
authored
LowerTypeTests: Shrink check size by 1 instruction on x86.
We currently generate code like this on x86 for a jump table with 5 elements, assuming the call target is in rbx: lea global_addr(%rip), %rax # initialize temporary rax with base address mov %rbx, %rcx # initialize another temporary rcx for index (rbx will be used for the call, so it is still live) sub %rax, %rcx # compute `address - base` ror $0x3, %rcx # compute `(address - base) ror 3` i.e. index cmp $0x4, %rcx # check index <= 4 ja .Ltrap [...] .Ltrap: ud1 A more efficient instruction sequence, that only needs one temporary register and one fewer instruction, is possible by subtracting the address we are testing from the fixed address instead of vice versa: lea (global_addr + 4*8)(%rip), %rax # initialize temporary rax with address of last element sub %rbx, %rax # compute `last element - address` ror $0x3, %rax # compute `(last element - address) ror 3` i.e. 4 - index cmp $0x4, %rax # check 4 - index <= 4 (same as above) ja .Ltrap [...] .Ltrap: ud1 Change LowerTypeTests to generate that sequence. As a consequence, the order of bits in the bitsets is reversed. Because it doesn't matter how we do the subtraction on other architectures (to the best of my knowledge), do so unconditionally. Reviewers: fmayer, vitalybuka Reviewed By: fmayer Pull Request: #142887
1 parent faaae66 commit e3c72e1

File tree

14 files changed

+73
-64
lines changed

14 files changed

+73
-64
lines changed

llvm/lib/Transforms/IPO/LowerTypeTests.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
143143
if (BitOffset >= BitSize)
144144
return false;
145145

146-
return Bits.count(BitOffset);
146+
return Bits.count(BitSize - 1 - BitOffset);
147147
}
148148

149149
void BitSetInfo::print(raw_ostream &OS) const {
@@ -188,7 +188,11 @@ BitSetInfo BitSetBuilder::build() {
188188
BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
189189
for (uint64_t Offset : Offsets) {
190190
Offset >>= BSI.AlignLog2;
191-
BSI.Bits.insert(Offset);
191+
// We invert the order of bits when adding them to the bitset. This is
192+
// because the offset that we test against is computed by subtracting the
193+
// address that we are testing from the global's address, which means that
194+
// the offset increases as the tested address decreases.
195+
BSI.Bits.insert(BSI.BitSize - 1 - Offset);
192196
}
193197

194198
return BSI;
@@ -465,7 +469,8 @@ class LowerTypeTestsModule {
465469
struct TypeIdLowering {
466470
TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
467471

468-
/// All except Unsat: the start address within the combined global.
472+
/// All except Unsat: the address of the last element within the combined
473+
/// global.
469474
Constant *OffsetedGlobal;
470475

471476
/// ByteArray, Inline, AllOnes: log2 of the required global alignment
@@ -772,7 +777,11 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
772777
if (TIL.TheKind == TypeTestResolution::Single)
773778
return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
774779

775-
Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
780+
// Here we compute `last element - address`. The reason why we do this instead
781+
// of computing `address - first element` is that it leads to a slightly
782+
// shorter instruction sequence on x86. Because it doesn't matter how we do
783+
// the subtraction on other architectures, we do so unconditionally.
784+
Value *PtrOffset = B.CreateSub(OffsetedGlobalAsInt, PtrAsInt);
776785

777786
// We need to check that the offset both falls within our range and is
778787
// suitably aligned. We can check both properties at the same time by
@@ -1154,8 +1163,11 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
11541163

11551164
ByteArrayInfo *BAI = nullptr;
11561165
TypeIdLowering TIL;
1166+
1167+
uint64_t GlobalOffset =
1168+
BSI.ByteOffset + ((BSI.BitSize - 1) << BSI.AlignLog2);
11571169
TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
1158-
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
1170+
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, GlobalOffset)),
11591171
TIL.AlignLog2 = ConstantInt::get(IntPtrTy, BSI.AlignLog2);
11601172
TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
11611173
if (BSI.isAllOnes()) {
@@ -2533,9 +2545,9 @@ PreservedAnalyses SimplifyTypeTestsPass::run(Module &M,
25332545
continue;
25342546
for (Use &U : make_early_inc_range(CE->uses())) {
25352547
auto *CE = dyn_cast<ConstantExpr>(U.getUser());
2536-
if (U.getOperandNo() == 1 && CE &&
2548+
if (U.getOperandNo() == 0 && CE &&
25372549
CE->getOpcode() == Instruction::Sub &&
2538-
MaySimplifyInt(CE->getOperand(0))) {
2550+
MaySimplifyInt(CE->getOperand(1))) {
25392551
// This is a computation of PtrOffset as generated by
25402552
// LowerTypeTestsModule::lowerTypeTestCall above. If
25412553
// isKnownTypeIdMember passes we just pretend it evaluated to 0. This

llvm/test/Transforms/LowerTypeTests/aarch64-jumptable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define i1 @foo(ptr %p) {
4141
; AARCH64-LABEL: define i1 @foo
4242
; AARCH64-SAME: (ptr [[P:%.*]]) {
4343
; AARCH64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
44-
; AARCH64-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], ptrtoint (ptr @.cfi.jumptable to i64)
44+
; AARCH64-NEXT: [[TMP2:%.*]] = sub i64 ptrtoint (ptr getelementptr (i8, ptr @.cfi.jumptable, i64 8) to i64), [[TMP1]]
4545
; AARCH64-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 [[TMP2]], i64 3)
4646
; AARCH64-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP3]], 1
4747
; AARCH64-NEXT: ret i1 [[TMP4]]

llvm/test/Transforms/LowerTypeTests/asm.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33

44
define void @call(ptr %p) {
55
; CHECK: movl $__typeid_allones7_global_addr, %eax
6-
; CHECK-NEXT: movq %rdi, %rcx
7-
; CHECK-NEXT: subq %rax, %rcx
8-
; CHECK-NEXT: rorq $__typeid_allones7_align, %rcx
9-
; CHECK-NEXT: cmpq $__typeid_allones7_size_m1@ABS8, %rcx
6+
; CHECK-NEXT: subq %rdi, %rax
7+
; CHECK-NEXT: rorq $__typeid_allones7_align, %rax
8+
; CHECK-NEXT: cmpq $__typeid_allones7_size_m1@ABS8, %rax
109
%x = call i1 @llvm.type.test(ptr %p, metadata !"allones7")
1110
br i1 %x, label %t, label %f
1211

llvm/test/Transforms/LowerTypeTests/export-allones.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,11 @@
141141

142142
; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
143143

144-
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, ptr [[G]]
144+
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 2)
145145
; X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i64 1 to ptr)
146146
; X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 1 to ptr)
147147

148-
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 4)
148+
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 516)
149149
; X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i64 2 to ptr)
150150
; X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 128 to ptr)
151151

llvm/test/Transforms/LowerTypeTests/export-bytearray.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
1515
; CHECK: [[B:@[0-9]+]] = private constant [258 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01"
1616

17-
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, ptr [[G]]
17+
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 130)
1818
; X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i64 1 to ptr)
1919
; X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 65 to ptr)
2020
; CHECK: @__typeid_typeid1_byte_array = hidden alias i8, ptr @bits.1
2121
; X86: @__typeid_typeid1_bit_mask = hidden alias i8, inttoptr (i8 2 to ptr)
2222

23-
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 4)
23+
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 1032)
2424
; X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i64 2 to ptr)
2525
; X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 257 to ptr)
2626
; CHECK: @__typeid_typeid2_byte_array = hidden alias i8, ptr @bits

llvm/test/Transforms/LowerTypeTests/export-icall.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define void @f3(i32 %x) !type !8 {
3636
!8 = !{i64 0, !"typeid3"}
3737

3838

39-
; CHECK-DAG: @__typeid_typeid1_global_addr = hidden alias i8, ptr [[JT1:.*]]
39+
; CHECK-DAG: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr (i8, ptr [[JT1:.*]], i64 32)
4040
; CHECK-DAG: @__typeid_typeid1_align = hidden alias i8, inttoptr (i64 3 to ptr)
4141
; CHECK-DAG: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 4 to ptr)
4242

llvm/test/Transforms/LowerTypeTests/export-inline.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313

1414
; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
1515

16-
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, ptr [[G]]
16+
; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 6)
1717
; CHECK-X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 1 to ptr)
1818
; CHECK-X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 3 to ptr)
1919
; CHECK-X86: @__typeid_typeid1_inline_bits = hidden alias i8, inttoptr (i32 9 to ptr)
2020

21-
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 4)
21+
; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr (i8, ptr [[G]], i64 136)
2222
; CHECK-X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i8 2 to ptr)
2323
; CHECK-X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 33 to ptr)
2424
; CHECK-X86: @__typeid_typeid2_inline_bits = hidden alias i8, inttoptr (i64 8589934593 to ptr)

llvm/test/Transforms/LowerTypeTests/function.ll

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
; RUN: opt -S -passes=lowertypetests -mtriple=i686-unknown-linux-gnu %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE %s
2-
; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-unknown-linux-gnu %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE %s
3-
; RUN: opt -S -passes=lowertypetests -mtriple=i686-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
4-
; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
5-
; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
6-
; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
1+
; RUN: opt -S -passes=lowertypetests -mtriple=i686-unknown-linux-gnu %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE,JT8 %s
2+
; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-unknown-linux-gnu %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE,JT8 %s
3+
; RUN: opt -S -passes=lowertypetests -mtriple=i686-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE,JT8 %s
4+
; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE,JT8 %s
5+
; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE,JT8 %s
6+
; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE,JT8 %s
77
; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s
8-
; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s
8+
; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE,JT8 %s
99

1010
; The right format for Arm jump tables depends on the selected
1111
; subtarget, so we can't get these tests right without the Arm target
1212
; compiled in.
13-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=arm-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
14-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv7m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s %}
15-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv8m.base-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s %}
16-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv6m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMBV6M,NATIVE %s %}
17-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv5-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
18-
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
13+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=arm-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE,JT4 %s %}
14+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv7m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE,JT4 %s %}
15+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv8m.base-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE,JT4 %s %}
16+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv6m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMBV6M,NATIVE,JT16 %s %}
17+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv5-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE,JT4 %s %}
18+
; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE,JT4 %s %}
1919

2020
; Tests that we correctly handle bitsets containing 2 or more functions.
2121

@@ -54,20 +54,18 @@ define internal void @g() !type !0 {
5454
declare i1 @llvm.type.test(ptr %ptr, metadata %bitset) noinline readnone
5555

5656
define i1 @foo(ptr %p) {
57-
; NATIVE: sub i64 {{.*}}, ptrtoint (ptr @[[JT]] to i64)
58-
; WASM32: sub i64 {{.*}}, ptrtoint (ptr getelementptr (i8, ptr null, i64 1) to i64)
57+
; JT4: sub i64 ptrtoint (ptr getelementptr (i8, ptr @[[JT]], i64 4) to i64), {{.*}}
58+
; JT8: sub i64 ptrtoint (ptr getelementptr (i8, ptr @[[JT]], i64 8) to i64), {{.*}}
59+
; JT16: sub i64 ptrtoint (ptr getelementptr (i8, ptr @[[JT]], i64 16) to i64), {{.*}}
60+
; WASM32: sub i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 2) to i64), {{.*}}
5961
; WASM32: icmp ule i64 {{.*}}, 1
6062
%x = call i1 @llvm.type.test(ptr %p, metadata !"typeid1")
6163
ret i1 %x
6264
}
6365

64-
; X86-LINUX: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
65-
; X86-WIN32: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
66-
; ARM: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
67-
; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
68-
; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 {
69-
; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
70-
; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
66+
; JT4: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
67+
; JT8: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
68+
; JT16: define private void @[[JT]]() #[[ATTR:.*]] align 16 {
7169

7270
; X86: jmp ${0:c}@plt
7371
; X86-SAME: int3

0 commit comments

Comments
 (0)