Skip to content

Commit 2247072

Browse files
committed
AMDGPU/GlobalISel: Set insert point when emitting control flow pseudos
This was implicitly assuming the branch instruction was the next after the pseudo. It's possible for another non-terminator instruction to be inserted between the intrinsic and the branch, so adjust the insertion point. Fixes a non-terminator after terminator verifier error (which without the verifier, manifested itself as an infinite loop in analyzeBranch much later on).
1 parent fc148a4 commit 2247072

File tree

2 files changed

+101
-2
lines changed

2 files changed

+101
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4144,6 +4144,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
41444144
Register Use = MI.getOperand(3).getReg();
41454145

41464146
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
4147+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
41474148
if (IntrID == Intrinsic::amdgcn_if) {
41484149
B.buildInstr(AMDGPU::SI_IF)
41494150
.addDef(Def)
@@ -4184,6 +4185,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
41844185

41854186
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
41864187
Register Reg = MI.getOperand(2).getReg();
4188+
4189+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
41874190
B.buildInstr(AMDGPU::SI_LOOP)
41884191
.addUse(Reg)
41894192
.addMBB(UncondBrTarget);

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s
3-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
3+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
44

55
---
66
name: legal_brcond_vcc
@@ -310,3 +310,99 @@ body: |
310310
311311
bb.2:
312312
...
313+
314+
# There's another instruction between the intrinsic and the
315+
# conditional branch, so we need to move the insert point.
316+
---
317+
name: brcond_si_if_need_insert_terminator_point
318+
body: |
319+
; WAVE64-LABEL: name: brcond_si_if_need_insert_terminator_point
320+
; WAVE64: bb.0:
321+
; WAVE64: successors: %bb.1(0x80000000)
322+
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
323+
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
324+
; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
325+
; WAVE64: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
326+
; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
327+
; WAVE64: G_BR %bb.1
328+
; WAVE64: bb.1:
329+
; WAVE64: S_ENDPGM 0, implicit [[COPY2]](s32)
330+
; WAVE32-LABEL: name: brcond_si_if_need_insert_terminator_point
331+
; WAVE32: bb.0:
332+
; WAVE32: successors: %bb.1(0x80000000)
333+
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
334+
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
335+
; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
336+
; WAVE32: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
337+
; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
338+
; WAVE32: G_BR %bb.1
339+
; WAVE32: bb.1:
340+
; WAVE32: S_ENDPGM 0, implicit [[COPY2]](s32)
341+
bb.0:
342+
successors: %bb.1
343+
liveins: $vgpr0, $vgpr1, $vgpr2
344+
%0:_(s32) = COPY $vgpr0
345+
%1:_(s32) = COPY $vgpr1
346+
%2:_(s1) = G_ICMP intpred(ne), %0, %1
347+
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
348+
%5:_(s32) = COPY $vgpr2
349+
G_BRCOND %3, %bb.1
350+
351+
bb.1:
352+
S_ENDPGM 0, implicit %5
353+
...
354+
355+
---
356+
name: brcond_si_loop_need_terminator_insert_point
357+
tracksRegLiveness: true
358+
body: |
359+
; WAVE64-LABEL: name: brcond_si_loop_need_terminator_insert_point
360+
; WAVE64: bb.0:
361+
; WAVE64: successors: %bb.1(0x80000000)
362+
; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
363+
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
364+
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
365+
; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
366+
; WAVE64: bb.1:
367+
; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
368+
; WAVE64: S_NOP 0
369+
; WAVE64: S_NOP 0
370+
; WAVE64: S_NOP 0
371+
; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
372+
; WAVE64: G_BR %bb.2
373+
; WAVE64: bb.2:
374+
; WAVE64: S_NOP 0
375+
; WAVE32-LABEL: name: brcond_si_loop_need_terminator_insert_point
376+
; WAVE32: bb.0:
377+
; WAVE32: successors: %bb.1(0x80000000)
378+
; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
379+
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
380+
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
381+
; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
382+
; WAVE32: bb.1:
383+
; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
384+
; WAVE32: S_NOP 0
385+
; WAVE32: S_NOP 0
386+
; WAVE32: S_NOP 0
387+
; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
388+
; WAVE32: G_BR %bb.2
389+
; WAVE32: bb.2:
390+
; WAVE32: S_NOP 0
391+
bb.0:
392+
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
393+
%0:_(s32) = COPY $vgpr0
394+
%1:_(s32) = COPY $vgpr1
395+
%2:_(s64) = COPY $sgpr0_sgpr1
396+
397+
bb.1:
398+
successors: %bb.1, %bb.2
399+
S_NOP 0
400+
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
401+
S_NOP 0
402+
S_NOP 0
403+
G_BRCOND %3, %bb.2
404+
G_BR %bb.1
405+
406+
bb.2:
407+
S_NOP 0
408+
...

0 commit comments

Comments
 (0)