Skip to content

Commit 07fe9c6

Browse files
authored
[NVPTX] Use 0 immediate for i1 trunc, cleanup dead code (#135646)
Update the instruction selection for truncation to i1 to use "setp.ne %v, 0" as the zero immediate is a preferable canonical form. Also remove some dead code relating to the "set" instruction which we do not actually support currently.
1 parent 8e8d048 commit 07fe9c6

17 files changed

+44
-180
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 6 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,39 +1703,6 @@ def SETP_bf16x2rr :
17031703
[]>,
17041704
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
17051705

1706-
1707-
// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
1708-
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
1709-
// reg, either u32, s32, or f32. Anyway these aren't used at the moment.
1710-
1711-
let hasSideEffects = false in {
1712-
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1713-
def rr : NVPTXInst<(outs Int32Regs:$dst),
1714-
(ins RC:$a, RC:$b, CmpMode:$cmp),
1715-
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1716-
def ri : NVPTXInst<(outs Int32Regs:$dst),
1717-
(ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1718-
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1719-
def ir : NVPTXInst<(outs Int32Regs:$dst),
1720-
(ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1721-
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1722-
}
1723-
}
1724-
1725-
defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1726-
defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1727-
defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1728-
defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1729-
defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1730-
defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1731-
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1732-
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1733-
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1734-
defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
1735-
defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
1736-
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1737-
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1738-
17391706
//-----------------------------------
17401707
// Data Movement (Load / Store, Move)
17411708
//-----------------------------------
@@ -1842,16 +1809,7 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
18421809
Instruction setp_32ir,
18431810
Instruction setp_64rr,
18441811
Instruction setp_64ri,
1845-
Instruction setp_64ir,
1846-
Instruction set_16rr,
1847-
Instruction set_16ri,
1848-
Instruction set_16ir,
1849-
Instruction set_32rr,
1850-
Instruction set_32ri,
1851-
Instruction set_32ir,
1852-
Instruction set_64rr,
1853-
Instruction set_64ri,
1854-
Instruction set_64ir> {
1812+
Instruction setp_64ir> {
18551813
// i16 -> pred
18561814
def : Pat<(i1 (OpNode i16:$a, i16:$b)),
18571815
(setp_16rr $a, $b, Mode)>;
@@ -1873,38 +1831,13 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
18731831
(setp_64ri $a, imm:$b, Mode)>;
18741832
def : Pat<(i1 (OpNode imm:$a, i64:$b)),
18751833
(setp_64ir imm:$a, $b, Mode)>;
1876-
1877-
// i16 -> i32
1878-
def : Pat<(i32 (OpNode i16:$a, i16:$b)),
1879-
(set_16rr $a, $b, Mode)>;
1880-
def : Pat<(i32 (OpNode i16:$a, imm:$b)),
1881-
(set_16ri $a, imm:$b, Mode)>;
1882-
def : Pat<(i32 (OpNode imm:$a, i16:$b)),
1883-
(set_16ir imm:$a, $b, Mode)>;
1884-
// i32 -> i32
1885-
def : Pat<(i32 (OpNode i32:$a, i32:$b)),
1886-
(set_32rr $a, $b, Mode)>;
1887-
def : Pat<(i32 (OpNode i32:$a, imm:$b)),
1888-
(set_32ri $a, imm:$b, Mode)>;
1889-
def : Pat<(i32 (OpNode imm:$a, i32:$b)),
1890-
(set_32ir imm:$a, $b, Mode)>;
1891-
// i64 -> i32
1892-
def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)),
1893-
(set_64rr $a, $b, Mode)>;
1894-
def : Pat<(i32 (OpNode i64:$a, imm:$b)),
1895-
(set_64ri $a, imm:$b, Mode)>;
1896-
def : Pat<(i32 (OpNode imm:$a, i64:$b)),
1897-
(set_64ir imm:$a, $b, Mode)>;
18981834
}
18991835

19001836
multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
19011837
: ISET_FORMAT<OpNode, Mode,
19021838
SETP_s16rr, SETP_s16ri, SETP_s16ir,
19031839
SETP_s32rr, SETP_s32ri, SETP_s32ir,
1904-
SETP_s64rr, SETP_s64ri, SETP_s64ir,
1905-
SET_s16rr, SET_s16ri, SET_s16ir,
1906-
SET_s32rr, SET_s32ri, SET_s32ir,
1907-
SET_s64rr, SET_s64ri, SET_s64ir> {
1840+
SETP_s64rr, SETP_s64ri, SETP_s64ir> {
19081841
// TableGen doesn't like empty multiclasses.
19091842
def : PatLeaf<(i32 0)>;
19101843
}
@@ -1913,10 +1846,7 @@ multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
19131846
: ISET_FORMAT<OpNode, Mode,
19141847
SETP_u16rr, SETP_u16ri, SETP_u16ir,
19151848
SETP_u32rr, SETP_u32ri, SETP_u32ir,
1916-
SETP_u64rr, SETP_u64ri, SETP_u64ir,
1917-
SET_u16rr, SET_u16ri, SET_u16ir,
1918-
SET_u32rr, SET_u32ri, SET_u32ir,
1919-
SET_u64rr, SET_u64ri, SET_u64ir> {
1849+
SETP_u64rr, SETP_u64ri, SETP_u64ir> {
19201850
// TableGen doesn't like empty multiclasses.
19211851
def : PatLeaf<(i32 0)>;
19221852
}
@@ -2048,47 +1978,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
20481978
(SETP_f64ri $a, fpimm:$b, Mode)>;
20491979
def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
20501980
(SETP_f64ir fpimm:$a, $b, Mode)>;
2051-
2052-
// f16 -> i32
2053-
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2054-
(SET_f16rr $a, $b, ModeFTZ)>,
2055-
Requires<[useFP16Math, doF32FTZ]>;
2056-
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2057-
(SET_f16rr $a, $b, Mode)>,
2058-
Requires<[useFP16Math]>;
2059-
2060-
// bf16 -> i32
2061-
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2062-
(SET_bf16rr $a, $b, ModeFTZ)>,
2063-
Requires<[hasBF16Math, doF32FTZ]>;
2064-
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2065-
(SET_bf16rr $a, $b, Mode)>,
2066-
Requires<[hasBF16Math]>;
2067-
2068-
// f32 -> i32
2069-
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
2070-
(SET_f32rr $a, $b, ModeFTZ)>,
2071-
Requires<[doF32FTZ]>;
2072-
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
2073-
(SET_f32rr $a, $b, Mode)>;
2074-
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
2075-
(SET_f32ri $a, fpimm:$b, ModeFTZ)>,
2076-
Requires<[doF32FTZ]>;
2077-
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
2078-
(SET_f32ri $a, fpimm:$b, Mode)>;
2079-
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
2080-
(SET_f32ir fpimm:$a, $b, ModeFTZ)>,
2081-
Requires<[doF32FTZ]>;
2082-
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
2083-
(SET_f32ir fpimm:$a, $b, Mode)>;
2084-
2085-
// f64 -> i32
2086-
def : Pat<(i32 (OpNode f64:$a, f64:$b)),
2087-
(SET_f64rr $a, $b, Mode)>;
2088-
def : Pat<(i32 (OpNode f64:$a, fpimm:$b)),
2089-
(SET_f64ri $a, fpimm:$b, Mode)>;
2090-
def : Pat<(i32 (OpNode fpimm:$a, f64:$b)),
2091-
(SET_f64ir fpimm:$a, $b, Mode)>;
20921981
}
20931982

20941983
defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
@@ -2899,17 +2788,17 @@ def : Pat<(i32 (trunc i64:$a)),
28992788
def : Pat<(i16 (trunc i64:$a)),
29002789
(CVT_u16_u64 $a, CvtNONE)>;
29012790
def : Pat<(i1 (trunc i64:$a)),
2902-
(SETP_b64ri (ANDb64ri $a, 1), 1, CmpEQ)>;
2791+
(SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>;
29032792

29042793
// truncate i32
29052794
def : Pat<(i16 (trunc i32:$a)),
29062795
(CVT_u16_u32 $a, CvtNONE)>;
29072796
def : Pat<(i1 (trunc i32:$a)),
2908-
(SETP_b32ri (ANDb32ri $a, 1), 1, CmpEQ)>;
2797+
(SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>;
29092798

29102799
// truncate i16
29112800
def : Pat<(i1 (trunc i16:$a)),
2912-
(SETP_b16ri (ANDb16ri $a, 1), 1, CmpEQ)>;
2801+
(SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>;
29132802

29142803
// sext_inreg
29152804
def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
@@ -2919,31 +2808,6 @@ def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 $a)>;
29192808
def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 $a)>;
29202809
def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;
29212810

2922-
2923-
// Select instructions with 32-bit predicates
2924-
def : Pat<(select i32:$pred, i16:$a, i16:$b),
2925-
(SELP_b16rr $a, $b,
2926-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2927-
def : Pat<(select i32:$pred, i32:$a, i32:$b),
2928-
(SELP_b32rr $a, $b,
2929-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2930-
def : Pat<(select i32:$pred, i64:$a, i64:$b),
2931-
(SELP_b64rr $a, $b,
2932-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2933-
def : Pat<(select i32:$pred, f16:$a, f16:$b),
2934-
(SELP_f16rr $a, $b,
2935-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2936-
def : Pat<(select i32:$pred, bf16:$a, bf16:$b),
2937-
(SELP_bf16rr $a, $b,
2938-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2939-
def : Pat<(select i32:$pred, f32:$a, f32:$b),
2940-
(SELP_f32rr $a, $b,
2941-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2942-
def : Pat<(select i32:$pred, f64:$a, f64:$b),
2943-
(SELP_f64rr $a, $b,
2944-
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2945-
2946-
29472811
let hasSideEffects = false in {
29482812
// pack a set of smaller int registers to a larger int register
29492813
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11231123
; SM70-NEXT: // %bb.0:
11241124
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11251125
; SM70-NEXT: and.b16 %rs2, %rs1, 1;
1126-
; SM70-NEXT: setp.eq.b16 %p1, %rs2, 1;
1126+
; SM70-NEXT: setp.ne.b16 %p1, %rs2, 0;
11271127
; SM70-NEXT: selp.b32 %r1, 1, 0, %p1;
11281128
; SM70-NEXT: cvt.rn.f32.u32 %f1, %r1;
11291129
; SM70-NEXT: mov.b32 %r2, %f1;
@@ -1147,7 +1147,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11471147
; SM80-NEXT: // %bb.0:
11481148
; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11491149
; SM80-NEXT: and.b16 %rs2, %rs1, 1;
1150-
; SM80-NEXT: setp.eq.b16 %p1, %rs2, 1;
1150+
; SM80-NEXT: setp.ne.b16 %p1, %rs2, 0;
11511151
; SM80-NEXT: selp.b32 %r1, 1, 0, %p1;
11521152
; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1;
11531153
; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
@@ -1164,7 +1164,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11641164
; SM80-FTZ-NEXT: // %bb.0:
11651165
; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11661166
; SM80-FTZ-NEXT: and.b16 %rs2, %rs1, 1;
1167-
; SM80-FTZ-NEXT: setp.eq.b16 %p1, %rs2, 1;
1167+
; SM80-FTZ-NEXT: setp.ne.b16 %p1, %rs2, 0;
11681168
; SM80-FTZ-NEXT: selp.b32 %r1, 1, 0, %p1;
11691169
; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1;
11701170
; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
@@ -1180,7 +1180,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11801180
; SM90-NEXT: // %bb.0:
11811181
; SM90-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11821182
; SM90-NEXT: and.b16 %rs2, %rs1, 1;
1183-
; SM90-NEXT: setp.eq.b16 %p1, %rs2, 1;
1183+
; SM90-NEXT: setp.ne.b16 %p1, %rs2, 0;
11841184
; SM90-NEXT: selp.b32 %r1, 1, 0, %p1;
11851185
; SM90-NEXT: cvt.rn.bf16.u32 %rs3, %r1;
11861186
; SM90-NEXT: st.param.b16 [func_retval0], %rs3;

llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c
243243
; CHECK-NEXT: // %bb.0:
244244
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
245245
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
246-
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
246+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
247247
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_1];
248248
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_0];
249249
; CHECK-NEXT: selp.b32 %r3, %r2, %r1, %p1;

llvm/test/CodeGen/NVPTX/combine-mad.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) {
146146
; CHECK-NEXT: // %bb.0:
147147
; CHECK-NEXT: ld.param.u8 %rs1, [test4_param_3];
148148
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
149-
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
149+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
150150
; CHECK-NEXT: ld.param.u32 %r1, [test4_param_0];
151151
; CHECK-NEXT: ld.param.u32 %r2, [test4_param_1];
152152
; CHECK-NEXT: ld.param.u32 %r3, [test4_param_2];
@@ -170,7 +170,7 @@ define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) {
170170
; CHECK-NEXT: // %bb.0:
171171
; CHECK-NEXT: ld.param.u8 %rs1, [test4_rev_param_3];
172172
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
173-
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
173+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
174174
; CHECK-NEXT: ld.param.u32 %r1, [test4_rev_param_0];
175175
; CHECK-NEXT: ld.param.u32 %r2, [test4_rev_param_1];
176176
; CHECK-NEXT: ld.param.u32 %r3, [test4_rev_param_2];

llvm/test/CodeGen/NVPTX/copysign.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define float @fcopysign_f_d(float %a, double %b) {
4949
; CHECK-NEXT: ld.param.u64 %rd1, [fcopysign_f_d_param_1];
5050
; CHECK-NEXT: shr.u64 %rd2, %rd1, 63;
5151
; CHECK-NEXT: and.b64 %rd3, %rd2, 1;
52-
; CHECK-NEXT: setp.eq.b64 %p1, %rd3, 1;
52+
; CHECK-NEXT: setp.ne.b64 %p1, %rd3, 0;
5353
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
5454
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
5555
; CHECK-NEXT: ret;
@@ -72,7 +72,7 @@ define float @fcopysign_f_h(float %a, half %b) {
7272
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_f_h_param_1];
7373
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
7474
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
75-
; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
75+
; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
7676
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
7777
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
7878
; CHECK-NEXT: ret;
@@ -95,7 +95,7 @@ define double @fcopysign_d_f(double %a, float %b) {
9595
; CHECK-NEXT: ld.param.u32 %r1, [fcopysign_d_f_param_1];
9696
; CHECK-NEXT: shr.u32 %r2, %r1, 31;
9797
; CHECK-NEXT: and.b32 %r3, %r2, 1;
98-
; CHECK-NEXT: setp.eq.b32 %p1, %r3, 1;
98+
; CHECK-NEXT: setp.ne.b32 %p1, %r3, 0;
9999
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
100100
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
101101
; CHECK-NEXT: ret;
@@ -118,7 +118,7 @@ define double @fcopysign_d_h(double %a, half %b) {
118118
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_d_h_param_1];
119119
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
120120
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
121-
; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
121+
; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
122122
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
123123
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
124124
; CHECK-NEXT: ret;

llvm/test/CodeGen/NVPTX/f16-instructions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
329329
; CHECK-LABEL: test_select(
330330
; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0];
331331
; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1];
332-
; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
332+
; CHECK-DAG: setp.ne.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 0;
333333
; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
334334
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
335335
; CHECK-NEXT: ret;
@@ -653,7 +653,7 @@ else:
653653
; CHECK: call.uni (retval0),
654654
; CHECK-NEXT: test_dummy
655655
; CHECK: }
656-
; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
656+
; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
657657
; CHECK: @[[PRED]] bra [[LOOP]];
658658
; CHECK: st.param.b16 [func_retval0], [[R]];
659659
; CHECK: ret;

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
555555
; CHECK-NEXT: // %bb.0:
556556
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
557557
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
558-
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
558+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
559559
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_1];
560560
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_0];
561561
; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1;

llvm/test/CodeGen/NVPTX/forward-ld-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ define i32 @test_multi_block(ptr byval([10 x i32]) %a, i1 %p) {
112112
; CHECK-NEXT: // %bb.0:
113113
; CHECK-NEXT: ld.param.u8 %rs1, [test_multi_block_param_1];
114114
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
115-
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
115+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
116116
; CHECK-NEXT: not.pred %p2, %p1;
117117
; CHECK-NEXT: @%p2 bra $L__BB5_2;
118118
; CHECK-NEXT: // %bb.1: // %if

llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
33

44
; CHECK-LABEL: foo
5-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
5+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
66
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
77
; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]]
88
define float @foo(i1 %a) {
@@ -11,7 +11,7 @@ define float @foo(i1 %a) {
1111
}
1212

1313
; CHECK-LABEL: foo2
14-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
14+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
1515
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
1616
; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]]
1717
define float @foo2(i1 %a) {
@@ -20,7 +20,7 @@ define float @foo2(i1 %a) {
2020
}
2121

2222
; CHECK-LABEL: foo3
23-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
23+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
2424
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
2525
; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]]
2626
define double @foo3(i1 %a) {
@@ -29,7 +29,7 @@ define double @foo3(i1 %a) {
2929
}
3030

3131
; CHECK-LABEL: foo4
32-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
32+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
3333
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
3434
; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]]
3535
define double @foo4(i1 %a) {
@@ -38,7 +38,7 @@ define double @foo4(i1 %a) {
3838
}
3939

4040
; CHECK-LABEL: foo5
41-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
41+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
4242
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
4343
; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]]
4444
define half @foo5(i1 %a) {
@@ -47,7 +47,7 @@ define half @foo5(i1 %a) {
4747
}
4848

4949
; CHECK-LABEL: foo6
50-
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
50+
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
5151
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
5252
; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]]
5353
define half @foo6(i1 %a) {

0 commit comments

Comments
 (0)