Skip to content

[NVPTX] Use 0 immediate for i1 trunc, cleanup dead code #135646

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 6 additions & 142 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1703,39 +1703,6 @@ def SETP_bf16x2rr :
[]>,
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;


// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
// reg, either u32, s32, or f32. Anyway these aren't used at the moment.

let hasSideEffects = false in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ri : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, ImmCls:$b, CmpMode:$cmp),
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ir : NVPTXInst<(outs Int32Regs:$dst),
(ins ImmCls:$a, RC:$b, CmpMode:$cmp),
!strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
}
}

defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;

//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
Expand Down Expand Up @@ -1842,16 +1809,7 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
Instruction setp_32ir,
Instruction setp_64rr,
Instruction setp_64ri,
Instruction setp_64ir,
Instruction set_16rr,
Instruction set_16ri,
Instruction set_16ir,
Instruction set_32rr,
Instruction set_32ri,
Instruction set_32ir,
Instruction set_64rr,
Instruction set_64ri,
Instruction set_64ir> {
Instruction setp_64ir> {
// i16 -> pred
def : Pat<(i1 (OpNode i16:$a, i16:$b)),
(setp_16rr $a, $b, Mode)>;
Expand All @@ -1873,38 +1831,13 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
(setp_64ri $a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, i64:$b)),
(setp_64ir imm:$a, $b, Mode)>;

// i16 -> i32
def : Pat<(i32 (OpNode i16:$a, i16:$b)),
(set_16rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i16:$a, imm:$b)),
(set_16ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i16:$b)),
(set_16ir imm:$a, $b, Mode)>;
// i32 -> i32
def : Pat<(i32 (OpNode i32:$a, i32:$b)),
(set_32rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i32:$a, imm:$b)),
(set_32ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i32:$b)),
(set_32ir imm:$a, $b, Mode)>;
// i64 -> i32
def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)),
(set_64rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode i64:$a, imm:$b)),
(set_64ri $a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, i64:$b)),
(set_64ir imm:$a, $b, Mode)>;
}

multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
: ISET_FORMAT<OpNode, Mode,
SETP_s16rr, SETP_s16ri, SETP_s16ir,
SETP_s32rr, SETP_s32ri, SETP_s32ir,
SETP_s64rr, SETP_s64ri, SETP_s64ir,
SET_s16rr, SET_s16ri, SET_s16ir,
SET_s32rr, SET_s32ri, SET_s32ir,
SET_s64rr, SET_s64ri, SET_s64ir> {
SETP_s64rr, SETP_s64ri, SETP_s64ir> {
// TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
Expand All @@ -1913,10 +1846,7 @@ multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
: ISET_FORMAT<OpNode, Mode,
SETP_u16rr, SETP_u16ri, SETP_u16ir,
SETP_u32rr, SETP_u32ri, SETP_u32ir,
SETP_u64rr, SETP_u64ri, SETP_u64ir,
SET_u16rr, SET_u16ri, SET_u16ir,
SET_u32rr, SET_u32ri, SET_u32ir,
SET_u64rr, SET_u64ri, SET_u64ir> {
SETP_u64rr, SETP_u64ri, SETP_u64ir> {
// TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
Expand Down Expand Up @@ -2048,47 +1978,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SETP_f64ri $a, fpimm:$b, Mode)>;
def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
(SETP_f64ir fpimm:$a, $b, Mode)>;

// f16 -> i32
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
(SET_f16rr $a, $b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
(SET_f16rr $a, $b, Mode)>,
Requires<[useFP16Math]>;

// bf16 -> i32
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
(SET_bf16rr $a, $b, ModeFTZ)>,
Requires<[hasBF16Math, doF32FTZ]>;
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
(SET_bf16rr $a, $b, Mode)>,
Requires<[hasBF16Math]>;

// f32 -> i32
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
(SET_f32rr $a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
(SET_f32rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
(SET_f32ri $a, fpimm:$b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
(SET_f32ri $a, fpimm:$b, Mode)>;
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
(SET_f32ir fpimm:$a, $b, ModeFTZ)>,
Requires<[doF32FTZ]>;
def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
(SET_f32ir fpimm:$a, $b, Mode)>;

// f64 -> i32
def : Pat<(i32 (OpNode f64:$a, f64:$b)),
(SET_f64rr $a, $b, Mode)>;
def : Pat<(i32 (OpNode f64:$a, fpimm:$b)),
(SET_f64ri $a, fpimm:$b, Mode)>;
def : Pat<(i32 (OpNode fpimm:$a, f64:$b)),
(SET_f64ir fpimm:$a, $b, Mode)>;
}

defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
Expand Down Expand Up @@ -2899,17 +2788,17 @@ def : Pat<(i32 (trunc i64:$a)),
def : Pat<(i16 (trunc i64:$a)),
(CVT_u16_u64 $a, CvtNONE)>;
def : Pat<(i1 (trunc i64:$a)),
(SETP_b64ri (ANDb64ri $a, 1), 1, CmpEQ)>;
(SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>;

// truncate i32
def : Pat<(i16 (trunc i32:$a)),
(CVT_u16_u32 $a, CvtNONE)>;
def : Pat<(i1 (trunc i32:$a)),
(SETP_b32ri (ANDb32ri $a, 1), 1, CmpEQ)>;
(SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>;

// truncate i16
def : Pat<(i1 (trunc i16:$a)),
(SETP_b16ri (ANDb16ri $a, 1), 1, CmpEQ)>;
(SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>;

// sext_inreg
def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
Expand All @@ -2919,31 +2808,6 @@ def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 $a)>;
def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 $a)>;
def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;


// Select instructions with 32-bit predicates
def : Pat<(select i32:$pred, i16:$a, i16:$b),
(SELP_b16rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, i32:$a, i32:$b),
(SELP_b32rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, i64:$a, i64:$b),
(SELP_b64rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f16:$a, f16:$b),
(SELP_f16rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, bf16:$a, bf16:$b),
(SELP_bf16rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f32:$a, f32:$b),
(SELP_f32rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
def : Pat<(select i32:$pred, f64:$a, f64:$b),
(SELP_f64rr $a, $b,
(SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;


let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/bf16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM70-NEXT: and.b16 %rs2, %rs1, 1;
; SM70-NEXT: setp.eq.b16 %p1, %rs2, 1;
; SM70-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM70-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM70-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM70-NEXT: mov.b32 %r2, %f1;
Expand All @@ -1147,7 +1147,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM80-NEXT: and.b16 %rs2, %rs1, 1;
; SM80-NEXT: setp.eq.b16 %p1, %rs2, 1;
; SM80-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM80-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
Expand All @@ -1164,7 +1164,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM80-FTZ-NEXT: and.b16 %rs2, %rs1, 1;
; SM80-FTZ-NEXT: setp.eq.b16 %p1, %rs2, 1;
; SM80-FTZ-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM80-FTZ-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
Expand All @@ -1180,7 +1180,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM90-NEXT: and.b16 %rs2, %rs1, 1;
; SM90-NEXT: setp.eq.b16 %p1, %rs2, 1;
; SM90-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM90-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM90-NEXT: cvt.rn.bf16.u32 %rs3, %r1;
; SM90-NEXT: st.param.b16 [func_retval0], %rs3;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_1];
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_0];
; CHECK-NEXT: selp.b32 %r3, %r2, %r1, %p1;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/combine-mad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test4_param_3];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.u32 %r1, [test4_param_0];
; CHECK-NEXT: ld.param.u32 %r2, [test4_param_1];
; CHECK-NEXT: ld.param.u32 %r3, [test4_param_2];
Expand All @@ -170,7 +170,7 @@ define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test4_rev_param_3];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.u32 %r1, [test4_rev_param_0];
; CHECK-NEXT: ld.param.u32 %r2, [test4_rev_param_1];
; CHECK-NEXT: ld.param.u32 %r3, [test4_rev_param_2];
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/copysign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ define float @fcopysign_f_d(float %a, double %b) {
; CHECK-NEXT: ld.param.u64 %rd1, [fcopysign_f_d_param_1];
; CHECK-NEXT: shr.u64 %rd2, %rd1, 63;
; CHECK-NEXT: and.b64 %rd3, %rd2, 1;
; CHECK-NEXT: setp.eq.b64 %p1, %rd3, 1;
; CHECK-NEXT: setp.ne.b64 %p1, %rd3, 0;
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
; CHECK-NEXT: ret;
Expand All @@ -72,7 +72,7 @@ define float @fcopysign_f_h(float %a, half %b) {
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_f_h_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
; CHECK-NEXT: ret;
Expand All @@ -95,7 +95,7 @@ define double @fcopysign_d_f(double %a, float %b) {
; CHECK-NEXT: ld.param.u32 %r1, [fcopysign_d_f_param_1];
; CHECK-NEXT: shr.u32 %r2, %r1, 31;
; CHECK-NEXT: and.b32 %r3, %r2, 1;
; CHECK-NEXT: setp.eq.b32 %p1, %r3, 1;
; CHECK-NEXT: setp.ne.b32 %p1, %r3, 0;
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
; CHECK-NEXT: ret;
Expand All @@ -118,7 +118,7 @@ define double @fcopysign_d_h(double %a, half %b) {
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_d_h_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
; CHECK-NEXT: ret;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/f16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
; CHECK-LABEL: test_select(
; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0];
; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1];
; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
; CHECK-DAG: setp.ne.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 0;
; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
Expand Down Expand Up @@ -653,7 +653,7 @@ else:
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_dummy
; CHECK: }
; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: @[[PRED]] bra [[LOOP]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_1];
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_0];
; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/forward-ld-param.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ define i32 @test_multi_block(ptr byval([10 x i32]) %a, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_multi_block_param_1];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: not.pred %p2, %p1;
; CHECK-NEXT: @%p2 bra $L__BB5_2;
; CHECK-NEXT: // %bb.1: // %if
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}

; CHECK-LABEL: foo
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]]
define float @foo(i1 %a) {
Expand All @@ -11,7 +11,7 @@ define float @foo(i1 %a) {
}

; CHECK-LABEL: foo2
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]]
define float @foo2(i1 %a) {
Expand All @@ -20,7 +20,7 @@ define float @foo2(i1 %a) {
}

; CHECK-LABEL: foo3
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]]
define double @foo3(i1 %a) {
Expand All @@ -29,7 +29,7 @@ define double @foo3(i1 %a) {
}

; CHECK-LABEL: foo4
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]]
define double @foo4(i1 %a) {
Expand All @@ -38,7 +38,7 @@ define double @foo4(i1 %a) {
}

; CHECK-LABEL: foo5
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]]
define half @foo5(i1 %a) {
Expand All @@ -47,7 +47,7 @@ define half @foo5(i1 %a) {
}

; CHECK-LABEL: foo6
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]]
define half @foo6(i1 %a) {
Expand Down
Loading
Loading