Skip to content

Commit 0ac2841

Browse files
committed
[NVPTX] Avoid introducing unnecessary ProxyRegs and Movs in ISel
1 parent 2b932bc commit 0ac2841

30 files changed

+895
-906
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,22 +1994,15 @@ let IsSimpleMove=1, hasSideEffects=0 in {
19941994
def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
19951995
"mov.pred \t$dst, $src;",
19961996
[(set i1:$dst, imm:$src)]>;
1997-
def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1998-
"mov.u16 \t$dst, $src;",
1999-
[(set i16:$dst, imm:$src)]>;
2000-
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2001-
"mov.u32 \t$dst, $src;",
2002-
[(set i32:$dst, imm:$src)]>;
2003-
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2004-
"mov.u64 \t$dst, $src;",
2005-
[(set i64:$dst, imm:$src)]>;
2006-
20071997
def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
2008-
"mov.b16 \t$dst, $src;", []>;
1998+
"mov.b16 \t$dst, $src;",
1999+
[(set i16:$dst, imm:$src)]>;
20092000
def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2010-
"mov.b32 \t$dst, $src;", []>;
2001+
"mov.b32 \t$dst, $src;",
2002+
[(set i32:$dst, imm:$src)]>;
20112003
def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2012-
"mov.b64 \t$dst, $src;", []>;
2004+
"mov.b64 \t$dst, $src;",
2005+
[(set i64:$dst, imm:$src)]>;
20132006

20142007
def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
20152008
"mov.f32 \t$dst, $src;",
@@ -2018,8 +2011,8 @@ def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
20182011
"mov.f64 \t$dst, $src;",
20192012
[(set f64:$dst, fpimm:$src)]>;
20202013

2021-
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2022-
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
2014+
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOVB32ri texternalsym:$dst)>;
2015+
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOVB64ri texternalsym:$dst)>;
20232016

20242017
//---- Copy Frame Index ----
20252018
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
@@ -3104,21 +3097,17 @@ def: Pat<(f32 (bitconvert vt:$a)),
31043097
(BITCONVERT_32_I2F Int32Regs:$a)>;
31053098
}
31063099
foreach vt = [f16, bf16] in {
3107-
def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
3108-
(IMOVB16ri UInt16Const:$a)>;
3109-
def: Pat<(vt (bitconvert i16:$a)),
3110-
(ProxyRegI16 Int16Regs:$a)>;
3111-
def: Pat<(i16 (bitconvert vt:$a)),
3112-
(ProxyRegI16 Int16Regs:$a)>;
3100+
def: Pat<(vt (bitconvert i16:$a)),
3101+
(vt Int16Regs:$a)>;
3102+
def: Pat<(i16 (bitconvert vt:$a)),
3103+
(i16 Int16Regs:$a)>;
31133104
}
31143105

31153106
foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
3116-
def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
3117-
(IMOVB32ri UInt32Const:$a)>;
31183107
foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
31193108
if !ne(ta, tb) then {
3120-
def: Pat<(ta (bitconvert (tb Int32Regs:$a))),
3121-
(ProxyRegI32 Int32Regs:$a)>;
3109+
def: Pat<(ta (bitconvert tb:$a)),
3110+
(ta Int32Regs:$a)>;
31223111
}
31233112
}
31243113
}

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2803,10 +2803,10 @@ def : Pat<(int_nvvm_ptr_param_to_gen i64:$src),
28032803

28042804
// nvvm.ptr.gen.to.param
28052805
def : Pat<(int_nvvm_ptr_gen_to_param i32:$src),
2806-
(IMOV32rr Int32Regs:$src)>;
2806+
(i32 Int32Regs:$src)>;
28072807

28082808
def : Pat<(int_nvvm_ptr_gen_to_param i64:$src),
2809-
(IMOV64rr Int64Regs:$src)>;
2809+
(i64 Int64Regs:$src)>;
28102810

28112811
// nvvm.move intrinsicc
28122812
def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),

llvm/test/CodeGen/NVPTX/atomics-sm70.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
4646
; CHECKPTX62-LABEL: test(
4747
; CHECKPTX62: {
4848
; CHECKPTX62-NEXT: .reg .pred %p<5>;
49-
; CHECKPTX62-NEXT: .reg .b16 %rs<19>;
49+
; CHECKPTX62-NEXT: .reg .b16 %rs<11>;
5050
; CHECKPTX62-NEXT: .reg .b32 %r<58>;
5151
; CHECKPTX62-EMPTY:
5252
; CHECKPTX62-NEXT: // %bb.0:
@@ -65,8 +65,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
6565
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
6666
; CHECKPTX62-NEXT: shr.u32 %r28, %r54, %r2;
6767
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r28;
68-
; CHECKPTX62-NEXT: add.rn.f16 %rs4, %rs2, %rs1;
69-
; CHECKPTX62-NEXT: cvt.u32.u16 %r29, %rs4;
68+
; CHECKPTX62-NEXT: add.rn.f16 %rs3, %rs2, %rs1;
69+
; CHECKPTX62-NEXT: cvt.u32.u16 %r29, %rs3;
7070
; CHECKPTX62-NEXT: shl.b32 %r30, %r29, %r2;
7171
; CHECKPTX62-NEXT: and.b32 %r31, %r54, %r3;
7272
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
@@ -79,10 +79,10 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
7979
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
8080
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
8181
; CHECKPTX62-NEXT: shr.u32 %r33, %r55, %r2;
82-
; CHECKPTX62-NEXT: cvt.u16.u32 %rs6, %r33;
83-
; CHECKPTX62-NEXT: mov.b16 %rs8, 0x3C00;
84-
; CHECKPTX62-NEXT: add.rn.f16 %rs9, %rs6, %rs8;
85-
; CHECKPTX62-NEXT: cvt.u32.u16 %r34, %rs9;
82+
; CHECKPTX62-NEXT: cvt.u16.u32 %rs4, %r33;
83+
; CHECKPTX62-NEXT: mov.b16 %rs5, 0x3C00;
84+
; CHECKPTX62-NEXT: add.rn.f16 %rs6, %rs4, %rs5;
85+
; CHECKPTX62-NEXT: cvt.u32.u16 %r34, %rs6;
8686
; CHECKPTX62-NEXT: shl.b32 %r35, %r34, %r2;
8787
; CHECKPTX62-NEXT: and.b32 %r36, %r55, %r3;
8888
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
@@ -100,9 +100,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
100100
; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start9
101101
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
102102
; CHECKPTX62-NEXT: shr.u32 %r41, %r56, %r11;
103-
; CHECKPTX62-NEXT: cvt.u16.u32 %rs11, %r41;
104-
; CHECKPTX62-NEXT: add.rn.f16 %rs13, %rs11, %rs1;
105-
; CHECKPTX62-NEXT: cvt.u32.u16 %r42, %rs13;
103+
; CHECKPTX62-NEXT: cvt.u16.u32 %rs7, %r41;
104+
; CHECKPTX62-NEXT: add.rn.f16 %rs8, %rs7, %rs1;
105+
; CHECKPTX62-NEXT: cvt.u32.u16 %r42, %rs8;
106106
; CHECKPTX62-NEXT: shl.b32 %r43, %r42, %r11;
107107
; CHECKPTX62-NEXT: and.b32 %r44, %r56, %r12;
108108
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
@@ -120,9 +120,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
120120
; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start
121121
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
122122
; CHECKPTX62-NEXT: shr.u32 %r49, %r57, %r17;
123-
; CHECKPTX62-NEXT: cvt.u16.u32 %rs15, %r49;
124-
; CHECKPTX62-NEXT: add.rn.f16 %rs17, %rs15, %rs1;
125-
; CHECKPTX62-NEXT: cvt.u32.u16 %r50, %rs17;
123+
; CHECKPTX62-NEXT: cvt.u16.u32 %rs9, %r49;
124+
; CHECKPTX62-NEXT: add.rn.f16 %rs10, %rs9, %rs1;
125+
; CHECKPTX62-NEXT: cvt.u32.u16 %r50, %rs10;
126126
; CHECKPTX62-NEXT: shl.b32 %r51, %r50, %r17;
127127
; CHECKPTX62-NEXT: and.b32 %r52, %r57, %r18;
128128
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;

llvm/test/CodeGen/NVPTX/atomics-sm90.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
4646
; CHECKPTX71-LABEL: test(
4747
; CHECKPTX71: {
4848
; CHECKPTX71-NEXT: .reg .pred %p<5>;
49-
; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
49+
; CHECKPTX71-NEXT: .reg .b16 %rs<22>;
5050
; CHECKPTX71-NEXT: .reg .b32 %r<4>;
5151
; CHECKPTX71-NEXT: .reg .f32 %f<12>;
5252
; CHECKPTX71-EMPTY:
@@ -55,49 +55,49 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
5555
; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
5656
; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
5757
; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
58-
; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
58+
; CHECKPTX71-NEXT: ld.b16 %rs18, [%r1];
5959
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
6060
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14
6161
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
62-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
62+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs18;
6363
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
6464
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
65-
; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
66-
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
67-
; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
65+
; CHECKPTX71-NEXT: atom.cas.b16 %rs3, [%r1], %rs18, %rs14;
66+
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs3, %rs18;
67+
; CHECKPTX71-NEXT: mov.u16 %rs18, %rs3;
6868
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
6969
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13
70-
; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
70+
; CHECKPTX71-NEXT: ld.b16 %rs19, [%r1];
7171
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8
7272
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
73-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
73+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs19;
7474
; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
75-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
76-
; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
77-
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
78-
; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
75+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs15, %f5;
76+
; CHECKPTX71-NEXT: atom.cas.b16 %rs6, [%r1], %rs19, %rs15;
77+
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs6, %rs19;
78+
; CHECKPTX71-NEXT: mov.u16 %rs19, %rs6;
7979
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
8080
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7
81-
; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
81+
; CHECKPTX71-NEXT: ld.global.b16 %rs20, [%r2];
8282
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2
8383
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
84-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
84+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs20;
8585
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
86-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
87-
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
88-
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
89-
; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
86+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs16, %f8;
87+
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs9, [%r2], %rs20, %rs16;
88+
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs9, %rs20;
89+
; CHECKPTX71-NEXT: mov.u16 %rs20, %rs9;
9090
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
9191
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1
92-
; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
92+
; CHECKPTX71-NEXT: ld.shared.b16 %rs21, [%r3];
9393
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
9494
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
95-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
95+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs21;
9696
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
97-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
98-
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
99-
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
100-
; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
97+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs17, %f11;
98+
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs12, [%r3], %rs21, %rs17;
99+
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs12, %rs21;
100+
; CHECKPTX71-NEXT: mov.u16 %rs21, %rs12;
101101
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
102102
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
103103
; CHECKPTX71-NEXT: ret;

0 commit comments

Comments
 (0)