Skip to content

Commit 1b39328

Browse files
[CodeGen] Fix MachineInstr::isSafeToMove handling of inline asm. (#126807)
Even if an inline asm doesn't have memory effects, we can't assume it's safe to speculate: it could trap, or cause undefined behavior. At the LLVM IR level, this is handled correctly: we don't speculate inline asm (unless it's marked "speculatable", but I don't think anyone does that). Codegen also needs to respect this restriction. This change stops Early If Conversion and similar passes from speculating an INLINEASM MachineInstr. Some uses of isSafeToMove probably could be switched to a different API: isSafeToMove assumes you're hoisting, but we could handle some forms of sinking more aggressively. But I'll leave that for a followup, if it turns out to be relevant. See also discussion on gcc bugtracker https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102150 .
1 parent fc655b1 commit 1b39328

File tree

8 files changed

+94
-42
lines changed

8 files changed

+94
-42
lines changed

llvm/lib/CodeGen/MachineInstr.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1308,11 +1308,28 @@ bool MachineInstr::isSafeToMove(bool &SawStore) const {
13081308
return false;
13091309
}
13101310

1311+
// Don't touch instructions that have non-trivial invariants. For example,
1312+
// terminators have to be at the end of a basic block.
13111313
if (isPosition() || isDebugInstr() || isTerminator() ||
1312-
mayRaiseFPException() || hasUnmodeledSideEffects() ||
13131314
isJumpTableDebugInfo())
13141315
return false;
13151316

1317+
// Don't touch instructions which can have non-load/store effects.
1318+
//
1319+
// Inline asm has a "sideeffect" marker to indicate whether the asm has
1320+
// intentional side-effects. Even if an inline asm is not "sideeffect",
1321+
// though, it still can't be speculatively executed: the operation might
1322+
// not be valid on the current target, or for some combinations of operands.
1323+
// (Some transforms that move an instruction don't speculatively execute it;
1324+
// we currently don't try to handle that distinction here.)
1325+
//
1326+
// Other instructions handled here include those that can raise FP
1327+
// exceptions, x86 "DIV" instructions which trap on divide by zero, and
1328+
// stack adjustments.
1329+
if (mayRaiseFPException() || hasProperty(MCID::UnmodeledSideEffects) ||
1330+
isInlineAsm())
1331+
return false;
1332+
13161333
// See if this instruction does a load. If so, we have to guarantee that the
13171334
// loaded value doesn't change between the load and the its intended
13181335
// destination. The check for isInvariantLoad gives the target the chance to
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
; Make sure we don't speculatively execute inline asm statements.
4+
5+
define dso_local i32 @main(i32 %argc, ptr %argv) {
6+
; CHECK-LABEL: main:
7+
; CHECK: // %bb.0: // %entry
8+
; CHECK-NEXT: cmp w0, #3
9+
; CHECK-NEXT: b.ne .LBB0_2
10+
; CHECK-NEXT: // %bb.1: // %if.then
11+
; CHECK-NEXT: //APP
12+
; CHECK-NEXT: mrs x0, SPSR_EL2
13+
; CHECK-NEXT: //NO_APP
14+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
15+
; CHECK-NEXT: ret
16+
; CHECK-NEXT: .LBB0_2: // %if.else
17+
; CHECK-NEXT: //APP
18+
; CHECK-NEXT: mrs x0, SPSR_EL1
19+
; CHECK-NEXT: //NO_APP
20+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
21+
; CHECK-NEXT: ret
22+
entry:
23+
%cmp = icmp eq i32 %argc, 3
24+
br i1 %cmp, label %if.then, label %if.else
25+
26+
if.then:
27+
%0 = tail call i64 asm "mrs $0, SPSR_EL2", "=r"()
28+
br label %if.end
29+
30+
if.else:
31+
%1 = tail call i64 asm "mrs $0, SPSR_EL1", "=r"()
32+
br label %if.end
33+
34+
if.end:
35+
%y.0.in = phi i64 [ %0, %if.then ], [ %1, %if.else ]
36+
%y.0 = trunc i64 %y.0.in to i32
37+
ret i32 %y.0
38+
}

llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,8 @@ bb5: ; preds = %bb3, %bb
2323
}
2424

2525
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
26-
; GCN: s_cbranch_execz
27-
28-
; GCN: ; %bb.{{[0-9]+}}:
2926
; GCN: v_cmp_ne_u32_e64
30-
31-
; GCN: BB{{[0-9]+_[0-9]+}}:
27+
; GCN: s_cbranch_execz
3228

3329
define amdgpu_kernel void @nonconvergent_inlineasm(ptr addrspace(1) nocapture %arg) {
3430
bb:

llvm/test/CodeGen/AMDGPU/early-if-convert.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,8 @@ endif:
5555
}
5656

5757
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_vcc_clobber:
58+
; GCN: s_cbranch_vccnz
5859
; GCN: ; clobber vcc
59-
; GCN: v_cmp_neq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
60-
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc
61-
; GCN: s_mov_b64 vcc, [[CMP]]
62-
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc
6360
define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(ptr addrspace(1) %out, ptr addrspace(1) %in, float %k) #0 {
6461
entry:
6562
%v = load i32, ptr addrspace(1) %in

llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,13 +2015,14 @@ define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
20152015
; NOSDWA-LABEL: sdwa_crash_inlineasm_def:
20162016
; NOSDWA: ; %bb.0: ; %bb
20172017
; NOSDWA-NEXT: s_mov_b32 s0, 0xffff
2018+
; NOSDWA-NEXT: s_and_b64 vcc, exec, -1
2019+
; NOSDWA-NEXT: ; implicit-def: $vgpr0_vgpr1
2020+
; NOSDWA-NEXT: .LBB21_1: ; %bb1
2021+
; NOSDWA-NEXT: ; =>This Inner Loop Header: Depth=1
20182022
; NOSDWA-NEXT: ;;#ASMSTART
20192023
; NOSDWA-NEXT: v_and_b32_e32 v0, s0, v0
20202024
; NOSDWA-NEXT: ;;#ASMEND
20212025
; NOSDWA-NEXT: v_or_b32_e32 v0, 0x10000, v0
2022-
; NOSDWA-NEXT: s_and_b64 vcc, exec, -1
2023-
; NOSDWA-NEXT: .LBB21_1: ; %bb1
2024-
; NOSDWA-NEXT: ; =>This Inner Loop Header: Depth=1
20252026
; NOSDWA-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
20262027
; NOSDWA-NEXT: s_waitcnt vmcnt(0)
20272028
; NOSDWA-NEXT: s_mov_b64 vcc, vcc
@@ -2032,13 +2033,14 @@ define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
20322033
; GFX89-LABEL: sdwa_crash_inlineasm_def:
20332034
; GFX89: ; %bb.0: ; %bb
20342035
; GFX89-NEXT: s_mov_b32 s0, 0xffff
2036+
; GFX89-NEXT: s_and_b64 vcc, exec, -1
2037+
; GFX89-NEXT: ; implicit-def: $vgpr0_vgpr1
2038+
; GFX89-NEXT: .LBB21_1: ; %bb1
2039+
; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
20352040
; GFX89-NEXT: ;;#ASMSTART
20362041
; GFX89-NEXT: v_and_b32_e32 v0, s0, v0
20372042
; GFX89-NEXT: ;;#ASMEND
20382043
; GFX89-NEXT: v_or_b32_e32 v0, 0x10000, v0
2039-
; GFX89-NEXT: s_and_b64 vcc, exec, -1
2040-
; GFX89-NEXT: .LBB21_1: ; %bb1
2041-
; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
20422044
; GFX89-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
20432045
; GFX89-NEXT: s_waitcnt vmcnt(0)
20442046
; GFX89-NEXT: s_mov_b64 vcc, vcc
@@ -2049,13 +2051,14 @@ define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
20492051
; GFX9-LABEL: sdwa_crash_inlineasm_def:
20502052
; GFX9: ; %bb.0: ; %bb
20512053
; GFX9-NEXT: s_mov_b32 s0, 0xffff
2054+
; GFX9-NEXT: s_and_b64 vcc, exec, -1
2055+
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
2056+
; GFX9-NEXT: .LBB21_1: ; %bb1
2057+
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
20522058
; GFX9-NEXT: ;;#ASMSTART
20532059
; GFX9-NEXT: v_and_b32_e32 v0, s0, v0
20542060
; GFX9-NEXT: ;;#ASMEND
20552061
; GFX9-NEXT: v_or_b32_e32 v0, 0x10000, v0
2056-
; GFX9-NEXT: s_and_b64 vcc, exec, -1
2057-
; GFX9-NEXT: .LBB21_1: ; %bb1
2058-
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
20592062
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
20602063
; GFX9-NEXT: s_waitcnt vmcnt(0)
20612064
; GFX9-NEXT: s_mov_b64 vcc, vcc
@@ -2066,13 +2069,14 @@ define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
20662069
; GFX10-LABEL: sdwa_crash_inlineasm_def:
20672070
; GFX10: ; %bb.0: ; %bb
20682071
; GFX10-NEXT: s_mov_b32 s0, 0xffff
2072+
; GFX10-NEXT: s_mov_b32 vcc_lo, exec_lo
2073+
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
2074+
; GFX10-NEXT: .LBB21_1: ; %bb1
2075+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
20692076
; GFX10-NEXT: ;;#ASMSTART
20702077
; GFX10-NEXT: v_and_b32_e32 v0, s0, v0
20712078
; GFX10-NEXT: ;;#ASMEND
20722079
; GFX10-NEXT: v_or_b32_e32 v0, 0x10000, v0
2073-
; GFX10-NEXT: s_mov_b32 vcc_lo, exec_lo
2074-
; GFX10-NEXT: .LBB21_1: ; %bb1
2075-
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
20762080
; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
20772081
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20782082
; GFX10-NEXT: s_cbranch_vccnz .LBB21_1

llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,11 +1601,11 @@ define void @infiniteloop2() "frame-pointer"="all" {
16011601
; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then
16021602
; ARM-ENABLE-NEXT: sub r1, sp, #16
16031603
; ARM-ENABLE-NEXT: mov sp, r1
1604+
; ARM-ENABLE-NEXT: LBB10_2: @ %for.body
1605+
; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16041606
; ARM-ENABLE-NEXT: @ InlineAsm Start
16051607
; ARM-ENABLE-NEXT: mov r2, #0
16061608
; ARM-ENABLE-NEXT: @ InlineAsm End
1607-
; ARM-ENABLE-NEXT: LBB10_2: @ %for.body
1608-
; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16091609
; ARM-ENABLE-NEXT: add r0, r2, r0
16101610
; ARM-ENABLE-NEXT: str r0, [r1]
16111611
; ARM-ENABLE-NEXT: @ InlineAsm Start
@@ -1629,11 +1629,11 @@ define void @infiniteloop2() "frame-pointer"="all" {
16291629
; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then
16301630
; ARM-DISABLE-NEXT: sub r1, sp, #16
16311631
; ARM-DISABLE-NEXT: mov sp, r1
1632+
; ARM-DISABLE-NEXT: LBB10_2: @ %for.body
1633+
; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16321634
; ARM-DISABLE-NEXT: @ InlineAsm Start
16331635
; ARM-DISABLE-NEXT: mov r2, #0
16341636
; ARM-DISABLE-NEXT: @ InlineAsm End
1635-
; ARM-DISABLE-NEXT: LBB10_2: @ %for.body
1636-
; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16371637
; ARM-DISABLE-NEXT: add r0, r2, r0
16381638
; ARM-DISABLE-NEXT: str r0, [r1]
16391639
; ARM-DISABLE-NEXT: @ InlineAsm Start
@@ -1657,11 +1657,11 @@ define void @infiniteloop2() "frame-pointer"="all" {
16571657
; THUMB-ENABLE-NEXT: sub.w r0, sp, #16
16581658
; THUMB-ENABLE-NEXT: mov sp, r0
16591659
; THUMB-ENABLE-NEXT: movs r1, #0
1660+
; THUMB-ENABLE-NEXT: LBB10_2: @ %for.body
1661+
; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16601662
; THUMB-ENABLE-NEXT: @ InlineAsm Start
16611663
; THUMB-ENABLE-NEXT: mov.w r2, #0
16621664
; THUMB-ENABLE-NEXT: @ InlineAsm End
1663-
; THUMB-ENABLE-NEXT: LBB10_2: @ %for.body
1664-
; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16651665
; THUMB-ENABLE-NEXT: add r1, r2
16661666
; THUMB-ENABLE-NEXT: str r1, [r0]
16671667
; THUMB-ENABLE-NEXT: @ InlineAsm Start
@@ -1686,11 +1686,11 @@ define void @infiniteloop2() "frame-pointer"="all" {
16861686
; THUMB-DISABLE-NEXT: sub.w r0, sp, #16
16871687
; THUMB-DISABLE-NEXT: mov sp, r0
16881688
; THUMB-DISABLE-NEXT: movs r1, #0
1689+
; THUMB-DISABLE-NEXT: LBB10_2: @ %for.body
1690+
; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16891691
; THUMB-DISABLE-NEXT: @ InlineAsm Start
16901692
; THUMB-DISABLE-NEXT: mov.w r2, #0
16911693
; THUMB-DISABLE-NEXT: @ InlineAsm End
1692-
; THUMB-DISABLE-NEXT: LBB10_2: @ %for.body
1693-
; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
16941694
; THUMB-DISABLE-NEXT: add r1, r2
16951695
; THUMB-DISABLE-NEXT: str r1, [r0]
16961696
; THUMB-DISABLE-NEXT: @ InlineAsm Start

llvm/test/CodeGen/X86/x86-shrink-wrapping.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -821,12 +821,12 @@ define void @infiniteloop() {
821821
; ENABLE-NEXT: movq %rsp, %rcx
822822
; ENABLE-NEXT: addq $-16, %rcx
823823
; ENABLE-NEXT: movq %rcx, %rsp
824-
; ENABLE-NEXT: ## InlineAsm Start
825-
; ENABLE-NEXT: movl $1, %edx
826-
; ENABLE-NEXT: ## InlineAsm End
827824
; ENABLE-NEXT: .p2align 4
828825
; ENABLE-NEXT: LBB10_2: ## %for.body
829826
; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1
827+
; ENABLE-NEXT: ## InlineAsm Start
828+
; ENABLE-NEXT: movl $1, %edx
829+
; ENABLE-NEXT: ## InlineAsm End
830830
; ENABLE-NEXT: addl %edx, %eax
831831
; ENABLE-NEXT: movl %eax, (%rcx)
832832
; ENABLE-NEXT: jmp LBB10_2
@@ -853,12 +853,12 @@ define void @infiniteloop() {
853853
; DISABLE-NEXT: movq %rsp, %rcx
854854
; DISABLE-NEXT: addq $-16, %rcx
855855
; DISABLE-NEXT: movq %rcx, %rsp
856-
; DISABLE-NEXT: ## InlineAsm Start
857-
; DISABLE-NEXT: movl $1, %edx
858-
; DISABLE-NEXT: ## InlineAsm End
859856
; DISABLE-NEXT: .p2align 4
860857
; DISABLE-NEXT: LBB10_2: ## %for.body
861858
; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1
859+
; DISABLE-NEXT: ## InlineAsm Start
860+
; DISABLE-NEXT: movl $1, %edx
861+
; DISABLE-NEXT: ## InlineAsm End
862862
; DISABLE-NEXT: addl %edx, %eax
863863
; DISABLE-NEXT: movl %eax, (%rcx)
864864
; DISABLE-NEXT: jmp LBB10_2

llvm/test/CodeGen/X86/x86-win64-shrink-wrapping.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) #0 {
2424
; ENABLE-NEXT: #NO_APP
2525
; ENABLE-NEXT: xorl %eax, %eax
2626
; ENABLE-NEXT: movl $10, %ecx
27-
; ENABLE-NEXT: #APP
28-
; ENABLE-NEXT: movl $1, %edx
29-
; ENABLE-NEXT: #NO_APP
3027
; ENABLE-NEXT: .p2align 4
3128
; ENABLE-NEXT: .LBB0_2: # %for.body
3229
; ENABLE-NEXT: # =>This Inner Loop Header: Depth=1
30+
; ENABLE-NEXT: #APP
31+
; ENABLE-NEXT: movl $1, %edx
32+
; ENABLE-NEXT: #NO_APP
3333
; ENABLE-NEXT: addl %edx, %eax
3434
; ENABLE-NEXT: decl %ecx
3535
; ENABLE-NEXT: jne .LBB0_2
@@ -64,12 +64,12 @@ define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) #0 {
6464
; DISABLE-NEXT: #NO_APP
6565
; DISABLE-NEXT: xorl %eax, %eax
6666
; DISABLE-NEXT: movl $10, %ecx
67-
; DISABLE-NEXT: #APP
68-
; DISABLE-NEXT: movl $1, %edx
69-
; DISABLE-NEXT: #NO_APP
7067
; DISABLE-NEXT: .p2align 4
7168
; DISABLE-NEXT: .LBB0_2: # %for.body
7269
; DISABLE-NEXT: # =>This Inner Loop Header: Depth=1
70+
; DISABLE-NEXT: #APP
71+
; DISABLE-NEXT: movl $1, %edx
72+
; DISABLE-NEXT: #NO_APP
7373
; DISABLE-NEXT: addl %edx, %eax
7474
; DISABLE-NEXT: decl %ecx
7575
; DISABLE-NEXT: jne .LBB0_2

0 commit comments

Comments
 (0)