Skip to content

Commit a9ab8a0

Browse files
authored
[NVPTX] Cleanup ISel for selp.* (llvm#135065)
This change uses the untyped variant of `selp.` in all integer cases to simplify the ISel TableGen logic. It is not expected to have any impact on the final SASS.
1 parent 6a45fce commit a9ab8a0

14 files changed

+191
-213
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 44 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -725,62 +725,40 @@ def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse f32:$lo)),
725725
// selp instructions that don't have any pattern matches; we explicitly use
726726
// them within this file.
727727
let hasSideEffects = false in {
728-
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
729-
def rr : NVPTXInst<(outs RC:$dst),
730-
(ins RC:$a, RC:$b, Int1Regs:$p),
731-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
732-
def ri : NVPTXInst<(outs RC:$dst),
733-
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
734-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
735-
def ir : NVPTXInst<(outs RC:$dst),
736-
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
737-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
738-
def ii : NVPTXInst<(outs RC:$dst),
739-
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
740-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
741-
}
742-
743-
multiclass SELP_PATTERN<string TypeStr, ValueType T, RegisterClass RC,
744-
Operand ImmCls, SDNode ImmNode> {
728+
multiclass SELP_PATTERN<string TypeStr, RegTyInfo t> {
729+
defvar asm_str = "selp." # TypeStr # " \t$dst, $a, $b, $p;";
745730
def rr :
746-
NVPTXInst<(outs RC:$dst),
747-
(ins RC:$a, RC:$b, Int1Regs:$p),
748-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
749-
[(set T:$dst, (select i1:$p, T:$a, T:$b))]>;
731+
NVPTXInst<(outs t.RC:$dst),
732+
(ins t.RC:$a, t.RC:$b, Int1Regs:$p),
733+
asm_str,
734+
[(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.Ty:$b))]>;
750735
def ri :
751-
NVPTXInst<(outs RC:$dst),
752-
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
753-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
754-
[(set T:$dst, (select i1:$p, T:$a, (T ImmNode:$b)))]>;
736+
NVPTXInst<(outs t.RC:$dst),
737+
(ins t.RC:$a, t.Imm:$b, Int1Regs:$p),
738+
asm_str,
739+
[(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.ImmNode:$b))]>;
755740
def ir :
756-
NVPTXInst<(outs RC:$dst),
757-
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
758-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
759-
[(set T:$dst, (select i1:$p, ImmNode:$a, T:$b))]>;
741+
NVPTXInst<(outs t.RC:$dst),
742+
(ins t.Imm:$a, t.RC:$b, Int1Regs:$p),
743+
asm_str,
744+
[(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.Ty:$b))]>;
760745
def ii :
761-
NVPTXInst<(outs RC:$dst),
762-
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
763-
!strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
764-
[(set T:$dst, (select i1:$p, ImmNode:$a, ImmNode:$b))]>;
746+
NVPTXInst<(outs t.RC:$dst),
747+
(ins t.Imm:$a, t.Imm:$b, Int1Regs:$p),
748+
asm_str,
749+
[(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.ImmNode:$b))]>;
765750
}
766751
}
767752

768753
// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
769754
// good.
770-
defm SELP_b16 : SELP_PATTERN<"b16", i16, Int16Regs, i16imm, imm>;
771-
defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
772-
defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
773-
defm SELP_b32 : SELP_PATTERN<"b32", i32, Int32Regs, i32imm, imm>;
774-
defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
775-
defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
776-
defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
777-
defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
778-
defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
779-
defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
780-
defm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>;
781-
782-
defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
783-
defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
755+
defm SELP_b16 : SELP_PATTERN<"b16", I16RT>;
756+
defm SELP_b32 : SELP_PATTERN<"b32", I32RT>;
757+
defm SELP_b64 : SELP_PATTERN<"b64", I64RT>;
758+
defm SELP_f16 : SELP_PATTERN<"b16", F16RT>;
759+
defm SELP_bf16 : SELP_PATTERN<"b16", BF16RT>;
760+
defm SELP_f32 : SELP_PATTERN<"f32", F32RT>;
761+
defm SELP_f64 : SELP_PATTERN<"f64", F64RT>;
784762

785763
// This does not work as tablegen fails to infer the type of 'imm'.
786764
// def v2f16imm : Operand<v2f16>;
@@ -2023,9 +2001,9 @@ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
20232001

20242002
// i1 compare -> i32
20252003
def : Pat<(i32 (setne i1:$a, i1:$b)),
2026-
(SELP_u32ii -1, 0, (XORb1rr $a, $b))>;
2004+
(SELP_b32ii -1, 0, (XORb1rr $a, $b))>;
20272005
def : Pat<(i32 (setne i1:$a, i1:$b)),
2028-
(SELP_u32ii 0, -1, (XORb1rr $a, $b))>;
2006+
(SELP_b32ii 0, -1, (XORb1rr $a, $b))>;
20292007

20302008

20312009

@@ -2690,7 +2668,7 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
26902668

26912669
// sint -> f16
26922670
def : Pat<(f16 (sint_to_fp i1:$a)),
2693-
(CVT_f16_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2671+
(CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
26942672
def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
26952673
(CVT_f16_s16 $a, CvtRN)>;
26962674
def : Pat<(f16 (sint_to_fp i32:$a)),
@@ -2700,7 +2678,7 @@ def : Pat<(f16 (sint_to_fp i64:$a)),
27002678

27012679
// uint -> f16
27022680
def : Pat<(f16 (uint_to_fp i1:$a)),
2703-
(CVT_f16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2681+
(CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27042682
def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
27052683
(CVT_f16_u16 $a, CvtRN)>;
27062684
def : Pat<(f16 (uint_to_fp i32:$a)),
@@ -2710,7 +2688,7 @@ def : Pat<(f16 (uint_to_fp i64:$a)),
27102688

27112689
// sint -> bf16
27122690
def : Pat<(bf16 (sint_to_fp i1:$a)),
2713-
(CVT_bf16_s32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2691+
(CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27142692
def : Pat<(bf16 (sint_to_fp i16:$a)),
27152693
(CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27162694
def : Pat<(bf16 (sint_to_fp i32:$a)),
@@ -2720,7 +2698,7 @@ def : Pat<(bf16 (sint_to_fp i64:$a)),
27202698

27212699
// uint -> bf16
27222700
def : Pat<(bf16 (uint_to_fp i1:$a)),
2723-
(CVT_bf16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2701+
(CVT_bf16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27242702
def : Pat<(bf16 (uint_to_fp i16:$a)),
27252703
(CVT_bf16_u16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
27262704
def : Pat<(bf16 (uint_to_fp i32:$a)),
@@ -2730,7 +2708,7 @@ def : Pat<(bf16 (uint_to_fp i64:$a)),
27302708

27312709
// sint -> f32
27322710
def : Pat<(f32 (sint_to_fp i1:$a)),
2733-
(CVT_f32_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2711+
(CVT_f32_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
27342712
def : Pat<(f32 (sint_to_fp i16:$a)),
27352713
(CVT_f32_s16 $a, CvtRN)>;
27362714
def : Pat<(f32 (sint_to_fp i32:$a)),
@@ -2740,7 +2718,7 @@ def : Pat<(f32 (sint_to_fp i64:$a)),
27402718

27412719
// uint -> f32
27422720
def : Pat<(f32 (uint_to_fp i1:$a)),
2743-
(CVT_f32_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2721+
(CVT_f32_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27442722
def : Pat<(f32 (uint_to_fp i16:$a)),
27452723
(CVT_f32_u16 $a, CvtRN)>;
27462724
def : Pat<(f32 (uint_to_fp i32:$a)),
@@ -2750,7 +2728,7 @@ def : Pat<(f32 (uint_to_fp i64:$a)),
27502728

27512729
// sint -> f64
27522730
def : Pat<(f64 (sint_to_fp i1:$a)),
2753-
(CVT_f64_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2731+
(CVT_f64_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
27542732
def : Pat<(f64 (sint_to_fp i16:$a)),
27552733
(CVT_f64_s16 $a, CvtRN)>;
27562734
def : Pat<(f64 (sint_to_fp i32:$a)),
@@ -2760,7 +2738,7 @@ def : Pat<(f64 (sint_to_fp i64:$a)),
27602738

27612739
// uint -> f64
27622740
def : Pat<(f64 (uint_to_fp i1:$a)),
2763-
(CVT_f64_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2741+
(CVT_f64_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
27642742
def : Pat<(f64 (uint_to_fp i16:$a)),
27652743
(CVT_f64_u16 $a, CvtRN)>;
27662744
def : Pat<(f64 (uint_to_fp i32:$a)),
@@ -2862,27 +2840,27 @@ def : Pat<(i64 (fp_to_uint f64:$a)),
28622840

28632841
// sext i1
28642842
def : Pat<(i16 (sext i1:$a)),
2865-
(SELP_s16ii -1, 0, $a)>;
2843+
(SELP_b16ii -1, 0, $a)>;
28662844
def : Pat<(i32 (sext i1:$a)),
2867-
(SELP_s32ii -1, 0, $a)>;
2845+
(SELP_b32ii -1, 0, $a)>;
28682846
def : Pat<(i64 (sext i1:$a)),
2869-
(SELP_s64ii -1, 0, $a)>;
2847+
(SELP_b64ii -1, 0, $a)>;
28702848

28712849
// zext i1
28722850
def : Pat<(i16 (zext i1:$a)),
2873-
(SELP_u16ii 1, 0, $a)>;
2851+
(SELP_b16ii 1, 0, $a)>;
28742852
def : Pat<(i32 (zext i1:$a)),
2875-
(SELP_u32ii 1, 0, $a)>;
2853+
(SELP_b32ii 1, 0, $a)>;
28762854
def : Pat<(i64 (zext i1:$a)),
2877-
(SELP_u64ii 1, 0, $a)>;
2855+
(SELP_b64ii 1, 0, $a)>;
28782856

28792857
// anyext i1
28802858
def : Pat<(i16 (anyext i1:$a)),
2881-
(SELP_u16ii -1, 0, $a)>;
2859+
(SELP_b16ii -1, 0, $a)>;
28822860
def : Pat<(i32 (anyext i1:$a)),
2883-
(SELP_u32ii -1, 0, $a)>;
2861+
(SELP_b32ii -1, 0, $a)>;
28842862
def : Pat<(i64 (anyext i1:$a)),
2885-
(SELP_u64ii -1, 0, $a)>;
2863+
(SELP_b64ii -1, 0, $a)>;
28862864

28872865
// sext i16
28882866
def : Pat<(i32 (sext i16:$a)),

llvm/test/CodeGen/NVPTX/add-sub-128bit.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define i128 @test_add(i128 %a, i128 %b) {
99
; NOCARRY: add.s64
1010
; NOCARRY-NEXT: add.s64
1111
; NOCARRY-NEXT: setp.lt.u64
12-
; NOCARRY-NEXT: selp.u64
12+
; NOCARRY-NEXT: selp.b64
1313
; NOCARRY-NEXT: add.s64
1414

1515
; CARRY: add.cc.s64
@@ -23,7 +23,7 @@ define i128 @test_add(i128 %a, i128 %b) {
2323
define i128 @test_sub(i128 %a, i128 %b) {
2424
; NOCARRY: sub.s64
2525
; NOCARRY-NEXT: setp.lt.u64
26-
; NOCARRY-NEXT: selp.s64
26+
; NOCARRY-NEXT: selp.b64
2727
; NOCARRY-NEXT: add.s64
2828
; NOCARRY-NEXT: sub.s64
2929

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11241124
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11251125
; SM70-NEXT: and.b16 %rs2, %rs1, 1;
11261126
; SM70-NEXT: setp.eq.b16 %p1, %rs2, 1;
1127-
; SM70-NEXT: selp.u32 %r1, 1, 0, %p1;
1127+
; SM70-NEXT: selp.b32 %r1, 1, 0, %p1;
11281128
; SM70-NEXT: cvt.rn.f32.u32 %f1, %r1;
11291129
; SM70-NEXT: mov.b32 %r2, %f1;
11301130
; SM70-NEXT: bfe.u32 %r3, %r2, 16, 1;
@@ -1148,7 +1148,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11481148
; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11491149
; SM80-NEXT: and.b16 %rs2, %rs1, 1;
11501150
; SM80-NEXT: setp.eq.b16 %p1, %rs2, 1;
1151-
; SM80-NEXT: selp.u32 %r1, 1, 0, %p1;
1151+
; SM80-NEXT: selp.b32 %r1, 1, 0, %p1;
11521152
; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1;
11531153
; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
11541154
; SM80-NEXT: st.param.b16 [func_retval0], %rs3;
@@ -1165,7 +1165,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11651165
; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11661166
; SM80-FTZ-NEXT: and.b16 %rs2, %rs1, 1;
11671167
; SM80-FTZ-NEXT: setp.eq.b16 %p1, %rs2, 1;
1168-
; SM80-FTZ-NEXT: selp.u32 %r1, 1, 0, %p1;
1168+
; SM80-FTZ-NEXT: selp.b32 %r1, 1, 0, %p1;
11691169
; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1;
11701170
; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
11711171
; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3;
@@ -1181,7 +1181,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
11811181
; SM90-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
11821182
; SM90-NEXT: and.b16 %rs2, %rs1, 1;
11831183
; SM90-NEXT: setp.eq.b16 %p1, %rs2, 1;
1184-
; SM90-NEXT: selp.u32 %r1, 1, 0, %p1;
1184+
; SM90-NEXT: selp.b32 %r1, 1, 0, %p1;
11851185
; SM90-NEXT: cvt.rn.bf16.u32 %rs3, %r1;
11861186
; SM90-NEXT: st.param.b16 [func_retval0], %rs3;
11871187
; SM90-NEXT: ret;

0 commit comments

Comments
 (0)