Skip to content

Commit ea222be

Browse files
authored
[MC] Honour alignment directive fill value for non-intel (#100136)
As reported in https://llvm.org/PR30955, `.balign` with a fill-value of 0 did not actually align using zeroes, on non-x86 targets. This is because the check of whether to use the code alignment routines or whether to just use the fill value was checking whether the fill value was equal to `TextAlignFillValue`, which has not been changed from its default of 0 on most targets (it has been changed for x86). However, most targets do not set the fill value because it doesn't entirely make sense -- i.e. on AArch64 there's no reasonable byte value to use for alignment, as instructions are word-sized and have to be well-aligned. I think the check at the end `AsmParser::parseDirectiveAlign` is suspicious even on x86 - if you use `.balign <align>, 0x90` in a code section, you don't end up with a block of `0x90` repeated, you end up with a block of NOPs of various widths. This functionality is never tested. The fix here is to modify the check to ignore the default text align fill value when choosing to do code alignment or not. Fixes #30303
1 parent bb0300c commit ea222be

File tree

14 files changed

+149
-41
lines changed

14 files changed

+149
-41
lines changed

lld/test/COFF/lto-cpu-string.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ target triple = "x86_64-pc-windows-msvc19.14.26433"
1414

1515
define dllexport void @foo() #0 {
1616
entry:
17-
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
17+
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
1818
ret void
1919
}
2020

lld/test/ELF/lto/cpu-string.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu"
1818

1919
define void @foo() #0 {
2020
entry:
21-
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
21+
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
2222
ret void
2323
}
2424

lld/test/ELF/lto/mllvm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
1717

1818
define void @_start() #0 {
1919
entry:
20-
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
20+
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
2121
ret void
2222
}
2323

lld/test/MachO/lto-cpu-string.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
1616

1717
define void @foo() #0 {
1818
entry:
19-
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
19+
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
2020
ret void
2121
}
2222

llvm/docs/ReleaseNotes.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,20 @@ Changes to Interprocedural Optimizations
6565
Changes to the AArch64 Backend
6666
------------------------------
6767

68+
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
69+
the required alignment space with a sequence of `0x0` bytes (the requested
70+
fill value) rather than NOPs.
71+
6872
Changes to the AMDGPU Backend
6973
-----------------------------
7074

7175
Changes to the ARM Backend
7276
--------------------------
7377

78+
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
79+
the required alignment space with a sequence of `0x0` bytes (the requested
80+
fill value) rather than NOPs.
81+
7482
Changes to the AVR Backend
7583
--------------------------
7684

@@ -92,6 +100,10 @@ Changes to the PowerPC Backend
92100
Changes to the RISC-V Backend
93101
-----------------------------
94102

103+
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
104+
the required alignment space with a sequence of `0x0` bytes (the requested
105+
fill value) rather than NOPs.
106+
95107
Changes to the WebAssembly Backend
96108
----------------------------------
97109

@@ -101,6 +113,13 @@ Changes to the Windows Target
101113
Changes to the X86 Backend
102114
--------------------------
103115

116+
* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections
117+
now fill the required alignment space with repeating `0x90` bytes, rather than
118+
using optimised NOP filling. Optimised NOP filling fills the space with NOP
119+
instructions of various widths, not just those that use the `0x90` byte
120+
encoding. To use optimised NOP filling in a code section, leave off the
121+
"fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively.
122+
104123
Changes to the OCaml bindings
105124
-----------------------------
106125

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3462,17 +3462,6 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
34623462
}
34633463
}
34643464

3465-
if (HasFillExpr && FillExpr != 0) {
3466-
MCSection *Sec = getStreamer().getCurrentSectionOnly();
3467-
if (Sec && Sec->isVirtualSection()) {
3468-
ReturnVal |=
3469-
Warning(FillExprLoc, "ignoring non-zero fill value in " +
3470-
Sec->getVirtualSectionKind() + " section '" +
3471-
Sec->getName() + "'");
3472-
FillExpr = 0;
3473-
}
3474-
}
3475-
34763465
// Diagnose non-sensical max bytes to align.
34773466
if (MaxBytesLoc.isValid()) {
34783467
if (MaxBytesToFill < 1) {
@@ -3489,13 +3478,20 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
34893478
}
34903479
}
34913480

3492-
// Check whether we should use optimal code alignment for this .align
3493-
// directive.
34943481
const MCSection *Section = getStreamer().getCurrentSectionOnly();
34953482
assert(Section && "must have section to emit alignment");
3496-
bool useCodeAlign = Section->useCodeAlign();
3497-
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
3498-
ValueSize == 1 && useCodeAlign) {
3483+
3484+
if (HasFillExpr && FillExpr != 0 && Section->isVirtualSection()) {
3485+
ReturnVal |=
3486+
Warning(FillExprLoc, "ignoring non-zero fill value in " +
3487+
Section->getVirtualSectionKind() +
3488+
" section '" + Section->getName() + "'");
3489+
FillExpr = 0;
3490+
}
3491+
3492+
// Check whether we should use optimal code alignment for this .align
3493+
// directive.
3494+
if (Section->useCodeAlign() && !HasFillExpr) {
34993495
getStreamer().emitCodeAlignment(
35003496
Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill);
35013497
} else {
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// RUN: llvm-mc -triple aarch64 %s -o - | FileCheck %s --check-prefix=ASM
2+
// RUN: llvm-mc -triple aarch64 -filetype obj %s -o - | \
3+
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
4+
5+
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
6+
// non-x86 targets.
7+
8+
.text
9+
10+
// ASM: add x14, x14, #1
11+
// OBJ: 910005ce add x14, x14, #0x1
12+
add x14, x14, 0x1
13+
14+
// ASM: .p2align 4, 0x0
15+
// OBJ-NEXT: 00000000 udf #0x0
16+
// OBJ-NEXT: 00000000 udf #0x0
17+
// OBJ-NEXT: 00000000 udf #0x0
18+
.balign 0x10, 0
19+
20+
// ASM: add x14, x14, #1
21+
// OBJ-NEXT: 910005ce add x14, x14, #0x1
22+
add x14, x14, 0x1
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// RUN: llvm-mc -triple armv7a %s -o - | FileCheck %s --check-prefix=ASM-ARM
2+
// RUN: llvm-mc -triple armv7a -filetype obj %s -o - | \
3+
// RUN: llvm-objdump --triple=armv7a -dz - | FileCheck %s --check-prefix=OBJ-ARM
4+
5+
// RUN: llvm-mc -triple thumbv7a %s -o - | FileCheck %s --check-prefix=ASM-THUMB
6+
// RUN: llvm-mc -triple thumbv7a -filetype obj %s -o - | \
7+
// RUN: llvm-objdump --triple=thumbv7a -dz - | FileCheck %s --check-prefix=OBJ-THUMB
8+
9+
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
10+
// non-x86 targets.
11+
12+
.text
13+
14+
// ASM-ARM: add r0, r0, #1
15+
// OBJ-ARM: e2800001 add r0, r0, #1
16+
17+
// ASM-THUMB: add.w r0, r0, #1
18+
// OBJ-THUMB: f100 0001 add.w r0, r0, #0x1
19+
add r0, r0, 0x1
20+
21+
// ASM-ARM: .p2align 4, 0x0
22+
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
23+
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
24+
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
25+
26+
// ASM-THUMB: .p2align 4, 0x0
27+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
28+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
29+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
30+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
31+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
32+
// OBJ-THUMB-NEXT: 0000 movs r0, r0
33+
.balign 0x10, 0
34+
35+
// ASM-ARM: add r0, r0, #1
36+
// OBJ-ARM-NEXT: e2800001 add r0, r0, #1
37+
38+
// ASM-THUMB: add.w r0, r0, #1
39+
// OBJ-THUMB-NEXT: f100 0001 add.w r0, r0, #0x1
40+
add r0, r0, 0x1

llvm/test/MC/AsmParser/directive_align.s

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# RUN: not llvm-mc -triple i386-apple-darwin9 %s 2> %t.err | FileCheck %s
22
# RUN: FileCheck < %t.err %s --check-prefix=CHECK-WARN
33

4+
.data
5+
46
# CHECK: TEST0:
57
# CHECK: .p2align 1
68
TEST0:

llvm/test/MC/COFF/align-nops.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
.text
55
f0:
66
.long 0
7-
.align 8, 0x90
7+
.align 8
88
.long 0
99
.align 8
1010

llvm/test/MC/ELF/align-nops.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
.text
55
f0:
66
.long 0
7-
.align 8, 0x00000090
7+
.align 8
88
.long 0
99
.align 8
1010

1111
// But not in another section
1212
.data
1313
.long 0
14-
.align 8, 0x00000090
14+
.align 8, 0x90
1515
.long 0
1616
.align 8
1717

llvm/test/MC/MachO/x86_32-optimal_nop.s

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@
55
ret
66
# nop
77
# 0x90
8-
.align 1, 0x90
8+
.align 1
99
ret
1010
# 2 byte nop test
1111
.align 4, 0 # start with 16 byte alignment filled with zeros
1212
ret
1313
ret
1414
# xchg %ax,%ax
1515
# 0x66, 0x90
16-
.align 2, 0x90
16+
.align 2
1717
ret
1818
# 3 byte nop test
1919
.align 4, 0 # start with 16 byte alignment filled with zeros
2020
ret
2121
# nopl (%[re]ax)
2222
# 0x0f, 0x1f, 0x00
23-
.align 2, 0x90
23+
.align 2
2424
ret
2525
# 4 byte nop test
2626
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -30,7 +30,7 @@
3030
ret
3131
# nopl 0(%[re]ax)
3232
# 0x0f, 0x1f, 0x40, 0x00
33-
.align 3, 0x90
33+
.align 3
3434
ret
3535
# 5 byte nop test
3636
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -39,22 +39,22 @@
3939
ret
4040
# nopl 0(%[re]ax,%[re]ax,1)
4141
# 0x0f, 0x1f, 0x44, 0x00, 0x00
42-
.align 3, 0x90
42+
.align 3
4343
ret
4444
# 6 byte nop test
4545
.align 4, 0 # start with 16 byte alignment filled with zeros
4646
ret
4747
ret
4848
# nopw 0(%[re]ax,%[re]ax,1)
4949
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
50-
.align 3, 0x90
50+
.align 3
5151
ret
5252
# 7 byte nop test
5353
.align 4, 0 # start with 16 byte alignment filled with zeros
5454
ret
5555
# nopl 0L(%[re]ax)
5656
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
57-
.align 3, 0x90
57+
.align 3
5858
ret
5959
# 8 byte nop test
6060
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -68,7 +68,7 @@
6868
ret
6969
# nopl 0L(%[re]ax,%[re]ax,1)
7070
# 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
71-
.align 3, 0x90
71+
.align 3
7272
ret
7373
# 9 byte nop test
7474
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -81,7 +81,7 @@
8181
ret
8282
# nopw 0L(%[re]ax,%[re]ax,1)
8383
# 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
84-
.align 4, 0x90
84+
.align 4
8585
ret
8686
# 10 byte nop test
8787
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -94,7 +94,7 @@
9494
ret
9595
# nopw %cs:0L(%[re]ax,%[re]ax,1)
9696
# 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
97-
.align 4, 0x90
97+
.align 4
9898
ret
9999
# 11 byte nop test
100100
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -105,7 +105,7 @@
105105
ret
106106
# nopw %cs:0L(%[re]ax,%[re]ax,1)
107107
# 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
108-
.align 4, 0x90
108+
.align 4
109109
ret
110110
# 12 byte nop test
111111
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -117,7 +117,7 @@
117117
# nopw 0(%[re]ax,%[re]ax,1)
118118
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
119119
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
120-
.align 4, 0x90
120+
.align 4
121121
ret
122122
# 13 byte nop test
123123
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -128,7 +128,7 @@
128128
# nopl 0L(%[re]ax)
129129
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
130130
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
131-
.align 4, 0x90
131+
.align 4
132132
ret
133133
# 14 byte nop test
134134
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -138,7 +138,7 @@
138138
# nopl 0L(%[re]ax)
139139
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
140140
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
141-
.align 4, 0x90
141+
.align 4
142142
ret
143143
# 15 byte nop test
144144
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -147,7 +147,7 @@
147147
# nopl 0L(%[re]ax,%[re]ax,1)
148148
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
149149
# 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
150-
.align 4, 0x90
150+
.align 4
151151
ret
152152

153153
# Only the .text sections gets optimal nops.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: llvm-mc -triple riscv32 %s -o - | FileCheck %s --check-prefix=ASM
2+
// RUN: llvm-mc -triple riscv32 -filetype obj %s -o - | \
3+
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
4+
5+
// RUN: llvm-mc -triple riscv64 %s -o - | FileCheck %s --check-prefix=ASM
6+
// RUN: llvm-mc -triple riscv64 -filetype obj %s -o - | \
7+
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
8+
9+
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
10+
// non-x86 targets.
11+
12+
.text
13+
14+
// ASM: addi a0, a0, 1
15+
// OBJ: 00150513 addi a0, a0, 0x1
16+
addi a0, a0, 0x1
17+
18+
// ASM: .p2align 4, 0x0
19+
// OBJ-NEXT: 0000 <unknown>
20+
// OBJ-NEXT: 0000 <unknown>
21+
// OBJ-NEXT: 0000 <unknown>
22+
// OBJ-NEXT: 0000 <unknown>
23+
// OBJ-NEXT: 0000 <unknown>
24+
// OBJ-NEXT: 0000 <unknown>
25+
.balign 0x10, 0
26+
27+
// ASM: addi a0, a0, 1
28+
// OBJ-NEXT: 00150513 addi a0, a0, 0x1
29+
addi a0, a0, 0x1

0 commit comments

Comments
 (0)