Skip to content

Commit 40d952b

Browse files
authored
[CGP] Avoid replacing a free ext with multiple other exts. (#77094)
Replacing a free extension with 2 or more extensions unnecessarily increases the number of IR instructions without providing any benefits. It also unnecessarily causes operations to be performed on wider types than necessary. In some cases, the extra extensions also pessimize codegen (see bfis-in-loop.ll). The changes in arm64-codegen-prepare-extload.ll also show that we avoid promotions that should only be performed in stress mode. PR: #77094
1 parent ba52f06 commit 40d952b

File tree

5 files changed

+81
-73
lines changed

5 files changed

+81
-73
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6013,15 +6013,18 @@ bool CodeGenPrepare::tryToPromoteExts(
60136013
// cut this search path, because it means we degrade the code quality.
60146014
// With exactly 2, the transformation is neutral, because we will merge
60156015
// one extension but leave one. However, we optimistically keep going,
6016-
// because the new extension may be removed too.
6016+
// because the new extension may be removed too. Also avoid replacing a
6017+
// single free extension with multiple extensions, as this increases the
6018+
// number of IR instructions while not providing any savings.
60176019
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
60186020
// FIXME: It would be possible to propagate a negative value instead of
60196021
// conservatively ceiling it to 0.
60206022
TotalCreatedInstsCost =
60216023
std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
60226024
if (!StressExtLdPromotion &&
60236025
(TotalCreatedInstsCost > 1 ||
6024-
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
6026+
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6027+
(ExtCost == 0 && NewExts.size() > 1))) {
60256028
// This promotion is not profitable, rollback to the previous state, and
60266029
// save the current extension in ProfitablyMovedExts as the latest
60276030
// speculative promotion turned out to be unprofitable.

llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,14 @@ entry:
528528
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, ptr %p
529529
;
530530
; This transformation should really happen only for stress mode.
531-
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
532-
; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
533-
; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
534-
; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
531+
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
532+
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
533+
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
534+
; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
535+
;
536+
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
537+
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
538+
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
535539
;
536540
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
537541
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
@@ -583,9 +587,13 @@ entry:
583587
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, ptr %p
584588
;
585589
; This transformation should really happen only for stress mode.
586-
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
587-
; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
588-
; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
590+
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
591+
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
592+
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
593+
;
594+
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
595+
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
596+
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
589597
;
590598
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
591599
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b

llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ define void @avoid_promotion_1_and(ptr nocapture noundef %arg, ptr %p) {
2424
; CHECK-NEXT: ldr w11, [x1, #76]
2525
; CHECK-NEXT: ldr w12, [x1]
2626
; CHECK-NEXT: eor w10, w10, w11
27-
; CHECK-NEXT: and x10, x10, x12
27+
; CHECK-NEXT: and w10, w10, w12
2828
; CHECK-NEXT: str w10, [x0, #32]
29-
; CHECK-NEXT: strh w9, [x1, x10, lsl #1]
29+
; CHECK-NEXT: strh w9, [x1, w10, uxtw #1]
3030
; CHECK-NEXT: b LBB0_1
3131
bb:
3232
%gep = getelementptr inbounds %struct.zot, ptr %arg, i64 0, i32 9
@@ -81,13 +81,13 @@ define void @avoid_promotion_2_and(ptr nocapture noundef %arg) {
8181
; CHECK-NEXT: ldrb w11, [x11, x12]
8282
; CHECK-NEXT: eor w10, w10, w11
8383
; CHECK-NEXT: ldur w11, [x8, #-24]
84-
; CHECK-NEXT: and x10, x10, x14
84+
; CHECK-NEXT: and w10, w10, w14
8585
; CHECK-NEXT: ldp x15, x14, [x8, #-16]
86-
; CHECK-NEXT: lsl x13, x10, #1
86+
; CHECK-NEXT: ubfiz x13, x10, #1, #32
8787
; CHECK-NEXT: str w10, [x8]
88-
; CHECK-NEXT: and x10, x11, x12
88+
; CHECK-NEXT: and w10, w11, w12
8989
; CHECK-NEXT: ldrh w11, [x14, x13]
90-
; CHECK-NEXT: strh w11, [x15, x10, lsl #1]
90+
; CHECK-NEXT: strh w11, [x15, w10, uxtw #1]
9191
; CHECK-NEXT: strh w12, [x14, x13]
9292
; CHECK-NEXT: b LBB1_1
9393
; CHECK-NEXT: LBB1_4: ; %exit

llvm/test/CodeGen/AArch64/bfis-in-loop.ll

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ define i64 @bfis_in_loop_zero() {
2020
; CHECK-NEXT: ldr x8, [x8]
2121
; CHECK-NEXT: .LBB0_1: // %midblock
2222
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
23-
; CHECK-NEXT: ldrh w10, [x8, #72]
24-
; CHECK-NEXT: ldr x13, [x8, #8]
25-
; CHECK-NEXT: ubfx x11, x10, #8, #24
26-
; CHECK-NEXT: cmp w10, #0
27-
; CHECK-NEXT: and x10, x10, #0xff
28-
; CHECK-NEXT: cset w12, ne
29-
; CHECK-NEXT: ldr x8, [x13, #16]
30-
; CHECK-NEXT: csel w9, w9, w11, eq
31-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
32-
; CHECK-NEXT: orr x10, x10, x9, lsl #8
33-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
34-
; CHECK-NEXT: orr x0, x11, x10
35-
; CHECK-NEXT: cbnz x13, .LBB0_1
23+
; CHECK-NEXT: ldrh w10, [x8, #72]
24+
; CHECK-NEXT: ldr x13, [x8, #8]
25+
; CHECK-NEXT: lsr w11, w10, #8
26+
; CHECK-NEXT: cmp w10, #0
27+
; CHECK-NEXT: ldr x8, [x13, #16]
28+
; CHECK-NEXT: cset w12, ne
29+
; CHECK-NEXT: csel w9, w9, w11, eq
30+
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
31+
; CHECK-NEXT: bfi w10, w9, #8, #24
32+
; CHECK-NEXT: orr x11, x11, x12, lsl #16
33+
; CHECK-NEXT: orr x0, x11, x10
34+
; CHECK-NEXT: cbnz x13, .LBB0_1
3635
; CHECK-NEXT: // %bb.2: // %exit
3736
; CHECK-NEXT: ret
3837
entry:
@@ -88,19 +87,18 @@ define i64 @bfis_in_loop_undef() {
8887
; CHECK-NEXT: ldr x9, [x9]
8988
; CHECK-NEXT: .LBB1_1: // %midblock
9089
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
91-
; CHECK-NEXT: ldrh w10, [x9, #72]
92-
; CHECK-NEXT: ldr x13, [x9, #8]
93-
; CHECK-NEXT: ubfx x11, x10, #8, #24
94-
; CHECK-NEXT: cmp w10, #0
95-
; CHECK-NEXT: and x10, x10, #0xff
96-
; CHECK-NEXT: cset w12, ne
97-
; CHECK-NEXT: ldr x9, [x13, #16]
98-
; CHECK-NEXT: csel w8, w8, w11, eq
99-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
100-
; CHECK-NEXT: orr x10, x10, x8, lsl #8
101-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
102-
; CHECK-NEXT: orr x0, x11, x10
103-
; CHECK-NEXT: cbnz x13, .LBB1_1
90+
; CHECK-NEXT: ldrh w10, [x9, #72]
91+
; CHECK-NEXT: ldr x13, [x9, #8]
92+
; CHECK-NEXT: lsr w11, w10, #8
93+
; CHECK-NEXT: cmp w10, #0
94+
; CHECK-NEXT: ldr x9, [x13, #16]
95+
; CHECK-NEXT: cset w12, ne
96+
; CHECK-NEXT: csel w8, w8, w11, eq
97+
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
98+
; CHECK-NEXT: bfi w10, w8, #8, #24
99+
; CHECK-NEXT: orr x11, x11, x12, lsl #16
100+
; CHECK-NEXT: orr x0, x11, x10
101+
; CHECK-NEXT: cbnz x13, .LBB1_1
104102
; CHECK-NEXT: // %bb.2: // %exit
105103
; CHECK-NEXT: ret
106104
entry:

llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,57 +20,56 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro
2020
; CHECK-NEXT: pushq %r13
2121
; CHECK-NEXT: pushq %r12
2222
; CHECK-NEXT: pushq %rbx
23-
; CHECK-NEXT: subq $24, %rsp
23+
; CHECK-NEXT: pushq %rax
2424
; CHECK-NEXT: .cfi_offset %rbx, -56
2525
; CHECK-NEXT: .cfi_offset %r12, -48
2626
; CHECK-NEXT: .cfi_offset %r13, -40
2727
; CHECK-NEXT: .cfi_offset %r14, -32
2828
; CHECK-NEXT: .cfi_offset %r15, -24
2929
; CHECK-NEXT: movl %r9d, %ebx
30+
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
3031
; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000
31-
; CHECK-NEXT: movl 0, %r13d
32+
; CHECK-NEXT: movl 0, %r11d
3233
; CHECK-NEXT: movl %esi, %r12d
33-
; CHECK-NEXT: # implicit-def: $eax
34-
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
34+
; CHECK-NEXT: # implicit-def: $r13d
3535
; CHECK-NEXT: testb $1, %bl
3636
; CHECK-NEXT: jne .LBB0_7
3737
; CHECK-NEXT: # %bb.1: # %if.else
3838
; CHECK-NEXT: movq %r8, %r14
39-
; CHECK-NEXT: movl %ecx, %eax
40-
; CHECK-NEXT: andl $1, %eax
41-
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
42-
; CHECK-NEXT: movzbl 544(%rax), %eax
43-
; CHECK-NEXT: andl $1, %eax
39+
; CHECK-NEXT: movl %ecx, %r13d
40+
; CHECK-NEXT: andl $1, %r13d
41+
; CHECK-NEXT: movzbl 544(%r13), %r8d
42+
; CHECK-NEXT: andl $1, %r8d
4443
; CHECK-NEXT: movl %r15d, %r9d
4544
; CHECK-NEXT: andl $1, %r9d
4645
; CHECK-NEXT: movl %r14d, %r10d
4746
; CHECK-NEXT: andl $1, %r10d
48-
; CHECK-NEXT: movl %esi, %r11d
47+
; CHECK-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000
48+
; CHECK-NEXT: orq %r10, %rax
49+
; CHECK-NEXT: movl %esi, %r10d
4950
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
50-
; CHECK-NEXT: shrl %cl, %r11d
51-
; CHECK-NEXT: movabsq $17592186044416, %r8 # imm = 0x100000000000
52-
; CHECK-NEXT: orq %r10, %r8
53-
; CHECK-NEXT: andl $2, %r11d
51+
; CHECK-NEXT: shrl %cl, %r10d
52+
; CHECK-NEXT: andl $2, %r10d
5453
; CHECK-NEXT: testb $1, %bl
55-
; CHECK-NEXT: cmoveq %r9, %r8
56-
; CHECK-NEXT: movl %edx, %ecx
57-
; CHECK-NEXT: orq %rax, %rcx
58-
; CHECK-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
59-
; CHECK-NEXT: orq $1, %r13
60-
; CHECK-NEXT: orl %esi, %r11d
61-
; CHECK-NEXT: movl $1, %edx
54+
; CHECK-NEXT: cmoveq %r9, %rax
55+
; CHECK-NEXT: orl %r8d, %edx
56+
; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
57+
; CHECK-NEXT: movq %r11, %rcx
58+
; CHECK-NEXT: orq $1, %rcx
59+
; CHECK-NEXT: orl %esi, %r10d
60+
; CHECK-NEXT: movl $1, %r8d
6261
; CHECK-NEXT: je .LBB0_3
6362
; CHECK-NEXT: # %bb.2: # %if.else
64-
; CHECK-NEXT: movl (%r8), %edx
63+
; CHECK-NEXT: movl (%rax), %r8d
6564
; CHECK-NEXT: .LBB0_3: # %if.else
66-
; CHECK-NEXT: shlq $5, %rcx
67-
; CHECK-NEXT: movq %r12, %rsi
68-
; CHECK-NEXT: shlq $7, %rsi
69-
; CHECK-NEXT: addq %rcx, %rsi
65+
; CHECK-NEXT: shlq $5, %rdx
66+
; CHECK-NEXT: movq %r12, %rax
67+
; CHECK-NEXT: shlq $7, %rax
68+
; CHECK-NEXT: leaq (%rax,%rdx), %rsi
7069
; CHECK-NEXT: addq $1248, %rsi # imm = 0x4E0
71-
; CHECK-NEXT: movq %r13, 0
70+
; CHECK-NEXT: movq %rcx, 0
7271
; CHECK-NEXT: movq %rdi, %r15
73-
; CHECK-NEXT: movl %edx, (%rdi)
72+
; CHECK-NEXT: movl %r8d, (%rdi)
7473
; CHECK-NEXT: xorl %eax, %eax
7574
; CHECK-NEXT: xorl %edi, %edi
7675
; CHECK-NEXT: xorl %edx, %edx
@@ -86,23 +85,23 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro
8685
; CHECK-NEXT: testb $1, %bl
8786
; CHECK-NEXT: movq %r15, %rdi
8887
; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000
89-
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
88+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
9089
; CHECK-NEXT: jne .LBB0_8
9190
; CHECK-NEXT: .LBB0_7: # %if.end69
92-
; CHECK-NEXT: movl %r13d, 0
91+
; CHECK-NEXT: movl %r11d, 0
9392
; CHECK-NEXT: xorl %eax, %eax
9493
; CHECK-NEXT: xorl %esi, %esi
9594
; CHECK-NEXT: xorl %edx, %edx
9695
; CHECK-NEXT: xorl %ecx, %ecx
9796
; CHECK-NEXT: xorl %r8d, %r8d
9897
; CHECK-NEXT: callq *%rax
9998
; CHECK-NEXT: xorq %r15, %r12
100-
; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
99+
; CHECK-NEXT: movslq %r13d, %rax
101100
; CHECK-NEXT: movzbl (%r12), %ecx
102101
; CHECK-NEXT: movb %cl, 544(%rax)
103102
; CHECK-NEXT: .LBB0_8: # %land.lhs.true56
104103
; CHECK-NEXT: xorl %eax, %eax
105-
; CHECK-NEXT: addq $24, %rsp
104+
; CHECK-NEXT: addq $8, %rsp
106105
; CHECK-NEXT: popq %rbx
107106
; CHECK-NEXT: popq %r12
108107
; CHECK-NEXT: popq %r13

0 commit comments

Comments
 (0)