Skip to content

Commit 31b4bf9

Browse files
author
Jeff Niu
authored
[llvm][NVPTX] Fix RAUW bug in NVPTXProxyRegErasure (#105871)
Fix bug introduced in #105730 The bug is in how the batch RAUW is implemented. If we have ``` %0 = mov %src %1 = mov %0 use %0 use %1 ``` The use of `%1` is rewritten to `%0`, not `%src`. This PR just looks for a replacement when it maps to the src register, which should transitively propagate the replacements.
1 parent a5d89d5 commit 31b4bf9

File tree

3 files changed

+103
-26
lines changed

3 files changed

+103
-26
lines changed

llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
7878
assert(InOp.isReg() && "ProxyReg input should be a register.");
7979
assert(OutOp.isReg() && "ProxyReg output should be a register.");
8080
RemoveList.push_back(&MI);
81-
RAUWBatch.try_emplace(OutOp.getReg(), InOp.getReg());
81+
Register replacement = InOp.getReg();
82+
// Check if the replacement itself has been replaced.
83+
if (auto it = RAUWBatch.find(replacement); it != RAUWBatch.end())
84+
replacement = it->second;
85+
RAUWBatch.try_emplace(OutOp.getReg(), replacement);
8286
break;
8387
}
8488
}

llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll

Lines changed: 0 additions & 25 deletions
This file was deleted.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# RUN: llc %s --run-pass=nvptx-proxyreg-erasure -march=nvptx64 -o - | FileCheck %s
2+
3+
--- |
4+
; ModuleID = 'third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll'
5+
source_filename = "third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll"
6+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
7+
8+
declare <4 x i32> @callee_vec_i32()
9+
10+
define <4 x i32> @check_vec_i32() {
11+
%ret = call <4 x i32> @callee_vec_i32()
12+
ret <4 x i32> %ret
13+
}
14+
15+
...
16+
---
17+
name: check_vec_i32
18+
alignment: 1
19+
exposesReturnsTwice: false
20+
legalized: false
21+
regBankSelected: false
22+
selected: false
23+
failedISel: false
24+
tracksRegLiveness: true
25+
hasWinCFI: false
26+
callsEHReturn: false
27+
callsUnwindInit: false
28+
hasEHCatchret: false
29+
hasEHScopes: false
30+
hasEHFunclets: false
31+
isOutlined: false
32+
debugInstrRef: false
33+
failsVerification: false
34+
tracksDebugUserValues: false
35+
registers:
36+
- { id: 0, class: int32regs, preferred-register: '' }
37+
- { id: 1, class: int32regs, preferred-register: '' }
38+
- { id: 2, class: int32regs, preferred-register: '' }
39+
- { id: 3, class: int32regs, preferred-register: '' }
40+
- { id: 4, class: int32regs, preferred-register: '' }
41+
- { id: 5, class: int32regs, preferred-register: '' }
42+
- { id: 6, class: int32regs, preferred-register: '' }
43+
- { id: 7, class: int32regs, preferred-register: '' }
44+
- { id: 8, class: int32regs, preferred-register: '' }
45+
- { id: 9, class: int32regs, preferred-register: '' }
46+
- { id: 10, class: int32regs, preferred-register: '' }
47+
- { id: 11, class: int32regs, preferred-register: '' }
48+
liveins: []
49+
frameInfo:
50+
isFrameAddressTaken: false
51+
isReturnAddressTaken: false
52+
hasStackMap: false
53+
hasPatchPoint: false
54+
stackSize: 0
55+
offsetAdjustment: 0
56+
maxAlignment: 1
57+
adjustsStack: false
58+
hasCalls: true
59+
stackProtector: ''
60+
functionContext: ''
61+
maxCallFrameSize: 4294967295
62+
cvBytesOfCalleeSavedRegisters: 0
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
hasTailCall: false
67+
isCalleeSavedInfoValid: false
68+
localFrameSize: 0
69+
savePoint: ''
70+
restorePoint: ''
71+
fixedStack: []
72+
stack: []
73+
entry_values: []
74+
callSites: []
75+
debugValueSubstitutions: []
76+
constants: []
77+
machineFunctionInfo: {}
78+
body: |
79+
bb.0:
80+
%0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
81+
; CHECK-NOT: ProxyReg
82+
%4:int32regs = ProxyRegI32 killed %0
83+
%5:int32regs = ProxyRegI32 killed %1
84+
%6:int32regs = ProxyRegI32 killed %2
85+
%7:int32regs = ProxyRegI32 killed %3
86+
; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3
87+
StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
88+
89+
%8:int32regs = LoadParamMemI32 0
90+
; CHECK-NOT: ProxyReg
91+
%9:int32regs = ProxyRegI32 killed %8
92+
%10:int32regs = ProxyRegI32 killed %9
93+
%11:int32regs = ProxyRegI32 killed %10
94+
; CHECK: StoreRetvalI32 killed %8
95+
StoreRetvalI32 killed %11, 0
96+
Return
97+
98+
...

0 commit comments

Comments
 (0)