Skip to content

Commit ff25115

Browse files
[NVPTX] cleanup & canonicalize mov (#129344)
Use a `multiclass` to define `mov` and canonicalize the `mov` instruction to always use the `b<bit-size>` suffix.
1 parent ff3e2ba commit ff25115

27 files changed

+412
-439
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
10081008
Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
10091009
break;
10101010
case ADDRESS_SPACE_PARAM:
1011-
Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
1011+
Opc = TM.is64Bit() ? NVPTX::IMOV64r : NVPTX::IMOV32r;
10121012
break;
10131013
}
10141014

@@ -2172,10 +2172,10 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
21722172
auto API = APF.bitcastToAPInt();
21732173
API = API.concat(API);
21742174
auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);
2175-
return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32ri, DL, VT, Const), 0);
2175+
return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32i, DL, VT, Const), 0);
21762176
}
21772177
auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);
2178-
return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16ri, DL, VT, Const), 0);
2178+
return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16i, DL, VT, Const), 0);
21792179
};
21802180

21812181
switch (N->getOpcode()) {

llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,22 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
4040

4141
unsigned Op;
4242
if (DestRC == &NVPTX::Int1RegsRegClass) {
43-
Op = NVPTX::IMOV1rr;
43+
Op = NVPTX::IMOV1r;
4444
} else if (DestRC == &NVPTX::Int16RegsRegClass) {
45-
Op = NVPTX::IMOV16rr;
45+
Op = NVPTX::MOV16r;
4646
} else if (DestRC == &NVPTX::Int32RegsRegClass) {
47-
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
47+
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r
4848
: NVPTX::BITCONVERT_32_F2I);
4949
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
50-
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
50+
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r
5151
: NVPTX::BITCONVERT_64_F2I);
5252
} else if (DestRC == &NVPTX::Int128RegsRegClass) {
53-
Op = NVPTX::IMOV128rr;
53+
Op = NVPTX::IMOV128r;
5454
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
55-
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
55+
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r
5656
: NVPTX::BITCONVERT_32_I2F);
5757
} else if (DestRC == &NVPTX::Float64RegsRegClass) {
58-
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
58+
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r
5959
: NVPTX::BITCONVERT_64_I2F);
6060
} else {
6161
llvm_unreachable("Bad register copy");

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 37 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,68 +1945,53 @@ def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
19451945

19461946
// Load a memory address into a u32 or u64 register.
19471947
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
1948-
"mov.u32 \t$dst, $a;",
1948+
"mov.b32 \t$dst, $a;",
19491949
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
19501950
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
1951-
"mov.u64 \t$dst, $a;",
1951+
"mov.b64 \t$dst, $a;",
19521952
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
19531953

19541954
// Get pointer to local stack.
19551955
let hasSideEffects = false in {
19561956
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1957-
"mov.u32 \t$d, __local_depot$num;", []>;
1957+
"mov.b32 \t$d, __local_depot$num;", []>;
19581958
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1959-
"mov.u64 \t$d, __local_depot$num;", []>;
1959+
"mov.b64 \t$d, __local_depot$num;", []>;
19601960
}
19611961

19621962

19631963
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1964-
let hasSideEffects=0, isAsCheapAsAMove=1 in {
1965-
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1966-
"mov.pred \t$dst, $sss;", []>;
1967-
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1968-
"mov.u16 \t$dst, $sss;", []>;
1969-
def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1970-
"mov.u32 \t$dst, $sss;", []>;
1971-
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1972-
"mov.u64 \t$dst, $sss;", []>;
1973-
def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
1974-
"mov.b128 \t$dst, $sss;", []>;
1975-
1976-
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1977-
"mov.f32 \t$dst, $src;", []>;
1978-
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1979-
"mov.f64 \t$dst, $src;", []>;
1980-
1981-
def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1982-
"mov.pred \t$dst, $src;",
1983-
[(set i1:$dst, imm:$src)]>;
1984-
def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1985-
"mov.b16 \t$dst, $src;",
1986-
[(set i16:$dst, imm:$src)]>;
1987-
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1988-
"mov.b32 \t$dst, $src;",
1989-
[(set i32:$dst, imm:$src)]>;
1990-
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1991-
"mov.b64 \t$dst, $src;",
1992-
[(set i64:$dst, imm:$src)]>;
1993-
1994-
def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
1995-
"mov.b16 \t$dst, $src;",
1996-
[(set f16:$dst, fpimm:$src)]>;
1997-
def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
1998-
"mov.b16 \t$dst, $src;",
1999-
[(set bf16:$dst, fpimm:$src)]>;
2000-
def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
2001-
"mov.f32 \t$dst, $src;",
2002-
[(set f32:$dst, fpimm:$src)]>;
2003-
def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
2004-
"mov.f64 \t$dst, $src;",
2005-
[(set f64:$dst, fpimm:$src)]>;
2006-
}
2007-
2008-
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2009-
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
1964+
let hasSideEffects = false, isAsCheapAsAMove = true in {
1965+
// Class for register-to-register moves
1966+
class MOVr<RegisterClass RC, string OpStr> :
1967+
NVPTXInst<(outs RC:$dst), (ins RC:$src),
1968+
"mov." # OpStr # " \t$dst, $src;", []>;
1969+
1970+
// Class for immediate-to-register moves
1971+
class MOVi<RegisterClass RC, string OpStr, ValueType VT, Operand IMMType, SDNode ImmNode> :
1972+
NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
1973+
"mov." # OpStr # " \t$dst, $src;",
1974+
[(set VT:$dst, ImmNode:$src)]>;
1975+
}
1976+
1977+
def IMOV1r : MOVr<Int1Regs, "pred">;
1978+
def IMOV1i : MOVi<Int1Regs, "pred", i1, i1imm, imm>;
1979+
def MOV16r : MOVr<Int16Regs, "b16">;
1980+
def IMOV16i : MOVi<Int16Regs, "b16", i16, i16imm, imm>;
1981+
def IMOV32r : MOVr<Int32Regs, "b32">;
1982+
def IMOV32i : MOVi<Int32Regs, "b32", i32, i32imm, imm>;
1983+
def IMOV64r : MOVr<Int64Regs, "b64">;
1984+
def IMOV64i : MOVi<Int64Regs, "b64", i64, i64imm, imm>;
1985+
def IMOV128r : MOVr<Int128Regs, "b128">;
1986+
def FMOV16i : MOVi<Int16Regs, "b16", f16, f16imm, fpimm>;
1987+
def BFMOV16i : MOVi<Int16Regs, "b16", bf16, bf16imm, fpimm>;
1988+
def FMOV32r : MOVr<Float32Regs, "b32">;
1989+
def FMOV32i : MOVi<Float32Regs, "b32", f32, f32imm, fpimm>;
1990+
def FMOV64r : MOVr<Float64Regs, "b64">;
1991+
def FMOV64i : MOVi<Float64Regs, "b64", f64, f64imm, fpimm>;
1992+
1993+
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
1994+
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
20101995

20111996
//---- Copy Frame Index ----
20121997
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
@@ -2717,8 +2702,8 @@ def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
27172702
def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
27182703
def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
27192704
def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
2720-
def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
2721-
def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
2705+
def ProxyRegF32 : ProxyRegInst<"b32", f32, Float32Regs>;
2706+
def ProxyRegF64 : ProxyRegInst<"b64", f64, Float64Regs>;
27222707

27232708
foreach vt = [f16, bf16] in {
27242709
def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 $src)>;

llvm/test/CodeGen/NVPTX/atomics-sm70.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
7272
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
7373
; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
7474
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
75-
; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
75+
; CHECKPTX62-NEXT: mov.b32 %r54, %r6;
7676
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
7777
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
7878
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
@@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
8888
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
8989
; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
9090
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
91-
; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
91+
; CHECKPTX62-NEXT: mov.b32 %r55, %r9;
9292
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
9393
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
9494
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
@@ -109,7 +109,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
109109
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
110110
; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
111111
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
112-
; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
112+
; CHECKPTX62-NEXT: mov.b32 %r56, %r15;
113113
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
114114
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
115115
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
130130
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
131131
; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
132132
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
133-
; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
133+
; CHECKPTX62-NEXT: mov.b32 %r57, %r21;
134134
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
135135
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
136136
; CHECKPTX62-NEXT: ret;

llvm/test/CodeGen/NVPTX/atomics-sm90.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
7373
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
7474
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
7575
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
76-
; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
76+
; CHECKPTX71-NEXT: mov.b32 %r54, %r6;
7777
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
7878
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
7979
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
@@ -89,7 +89,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
8989
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
9090
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
9191
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
92-
; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
92+
; CHECKPTX71-NEXT: mov.b32 %r55, %r9;
9393
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
9494
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
9595
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
@@ -111,7 +111,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
111111
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
112112
; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
113113
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
114-
; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
114+
; CHECKPTX71-NEXT: mov.b32 %r56, %r15;
115115
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
116116
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
117117
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
@@ -133,7 +133,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
133133
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
134134
; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
135135
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
136-
; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
136+
; CHECKPTX71-NEXT: mov.b32 %r57, %r21;
137137
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
138138
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
139139
; CHECKPTX71-NEXT: ret;

llvm/test/CodeGen/NVPTX/atomics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
429429
; CHECK-NEXT: membar.sys;
430430
; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r14;
431431
; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r16;
432-
; CHECK-NEXT: mov.u32 %r16, %r5;
432+
; CHECK-NEXT: mov.b32 %r16, %r5;
433433
; CHECK-NEXT: @%p1 bra $L__BB22_1;
434434
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
435435
; CHECK-NEXT: shr.u32 %r15, %r5, %r1;

llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ entry:
2121
%buf = alloca [16 x i8], align 4
2222

2323
; CHECK: .local .align 4 .b8 __local_depot0[16]
24-
; CHECK: mov.u64 %SPL
24+
; CHECK: mov.b64 %SPL
2525

2626
; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
2727
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]

0 commit comments

Comments
 (0)