@@ -725,62 +725,40 @@ def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse f32:$lo)),
725
725
// selp instructions that don't have any pattern matches; we explicitly use
726
726
// them within this file.
727
727
let hasSideEffects = false in {
728
- multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
729
- def rr : NVPTXInst<(outs RC:$dst),
730
- (ins RC:$a, RC:$b, Int1Regs:$p),
731
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
732
- def ri : NVPTXInst<(outs RC:$dst),
733
- (ins RC:$a, ImmCls:$b, Int1Regs:$p),
734
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
735
- def ir : NVPTXInst<(outs RC:$dst),
736
- (ins ImmCls:$a, RC:$b, Int1Regs:$p),
737
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
738
- def ii : NVPTXInst<(outs RC:$dst),
739
- (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
740
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
741
- }
742
-
743
- multiclass SELP_PATTERN<string TypeStr, ValueType T, RegisterClass RC,
744
- Operand ImmCls, SDNode ImmNode> {
728
+ multiclass SELP_PATTERN<string TypeStr, RegTyInfo t> {
729
+ defvar asm_str = "selp." # TypeStr # " \t$dst, $a, $b, $p;";
745
730
def rr :
746
- NVPTXInst<(outs RC:$dst),
747
- (ins RC:$a, RC:$b, Int1Regs:$p),
748
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
749
- [(set T :$dst, (select i1:$p, T :$a, T :$b))]>;
731
+ NVPTXInst<(outs t. RC:$dst),
732
+ (ins t. RC:$a, t. RC:$b, Int1Regs:$p),
733
+ asm_str ,
734
+ [(set t.Ty :$dst, (select i1:$p, t.Ty :$a, t.Ty :$b))]>;
750
735
def ri :
751
- NVPTXInst<(outs RC:$dst),
752
- (ins RC:$a, ImmCls :$b, Int1Regs:$p),
753
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
754
- [(set T :$dst, (select i1:$p, T :$a, (T ImmNode:$b) ))]>;
736
+ NVPTXInst<(outs t. RC:$dst),
737
+ (ins t. RC:$a, t.Imm :$b, Int1Regs:$p),
738
+ asm_str ,
739
+ [(set t.Ty :$dst, (select i1:$p, t.Ty :$a, t. ImmNode:$b))]>;
755
740
def ir :
756
- NVPTXInst<(outs RC:$dst),
757
- (ins ImmCls :$a, RC:$b, Int1Regs:$p),
758
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
759
- [(set T :$dst, (select i1:$p, ImmNode:$a, T :$b))]>;
741
+ NVPTXInst<(outs t. RC:$dst),
742
+ (ins t.Imm :$a, t. RC:$b, Int1Regs:$p),
743
+ asm_str ,
744
+ [(set t.Ty :$dst, (select i1:$p, t. ImmNode:$a, t.Ty :$b))]>;
760
745
def ii :
761
- NVPTXInst<(outs RC:$dst),
762
- (ins ImmCls :$a, ImmCls :$b, Int1Regs:$p),
763
- !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;") ,
764
- [(set T :$dst, (select i1:$p, ImmNode:$a, ImmNode:$b))]>;
746
+ NVPTXInst<(outs t. RC:$dst),
747
+ (ins t.Imm :$a, t.Imm :$b, Int1Regs:$p),
748
+ asm_str ,
749
+ [(set t.Ty :$dst, (select i1:$p, t. ImmNode:$a, t. ImmNode:$b))]>;
765
750
}
766
751
}
767
752
768
753
// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
769
754
// good.
770
- defm SELP_b16 : SELP_PATTERN<"b16", i16, Int16Regs, i16imm, imm>;
771
- defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
772
- defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
773
- defm SELP_b32 : SELP_PATTERN<"b32", i32, Int32Regs, i32imm, imm>;
774
- defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
775
- defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
776
- defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
777
- defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
778
- defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
779
- defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
780
- defm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>;
781
-
782
- defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
783
- defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
755
+ defm SELP_b16 : SELP_PATTERN<"b16", I16RT>;
756
+ defm SELP_b32 : SELP_PATTERN<"b32", I32RT>;
757
+ defm SELP_b64 : SELP_PATTERN<"b64", I64RT>;
758
+ defm SELP_f16 : SELP_PATTERN<"b16", F16RT>;
759
+ defm SELP_bf16 : SELP_PATTERN<"b16", BF16RT>;
760
+ defm SELP_f32 : SELP_PATTERN<"f32", F32RT>;
761
+ defm SELP_f64 : SELP_PATTERN<"f64", F64RT>;
784
762
785
763
// This does not work as tablegen fails to infer the type of 'imm'.
786
764
// def v2f16imm : Operand<v2f16>;
@@ -2023,9 +2001,9 @@ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2023
2001
2024
2002
// i1 compare -> i32
2025
2003
def : Pat<(i32 (setne i1:$a, i1:$b)),
2026
- (SELP_u32ii -1, 0, (XORb1rr $a, $b))>;
2004
+ (SELP_b32ii -1, 0, (XORb1rr $a, $b))>;
2027
2005
def : Pat<(i32 (setne i1:$a, i1:$b)),
2028
- (SELP_u32ii 0, -1, (XORb1rr $a, $b))>;
2006
+ (SELP_b32ii 0, -1, (XORb1rr $a, $b))>;
2029
2007
2030
2008
2031
2009
@@ -2690,7 +2668,7 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
2690
2668
2691
2669
// sint -> f16
2692
2670
def : Pat<(f16 (sint_to_fp i1:$a)),
2693
- (CVT_f16_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2671
+ (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
2694
2672
def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
2695
2673
(CVT_f16_s16 $a, CvtRN)>;
2696
2674
def : Pat<(f16 (sint_to_fp i32:$a)),
@@ -2700,7 +2678,7 @@ def : Pat<(f16 (sint_to_fp i64:$a)),
2700
2678
2701
2679
// uint -> f16
2702
2680
def : Pat<(f16 (uint_to_fp i1:$a)),
2703
- (CVT_f16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2681
+ (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
2704
2682
def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
2705
2683
(CVT_f16_u16 $a, CvtRN)>;
2706
2684
def : Pat<(f16 (uint_to_fp i32:$a)),
@@ -2710,7 +2688,7 @@ def : Pat<(f16 (uint_to_fp i64:$a)),
2710
2688
2711
2689
// sint -> bf16
2712
2690
def : Pat<(bf16 (sint_to_fp i1:$a)),
2713
- (CVT_bf16_s32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2691
+ (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2714
2692
def : Pat<(bf16 (sint_to_fp i16:$a)),
2715
2693
(CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2716
2694
def : Pat<(bf16 (sint_to_fp i32:$a)),
@@ -2720,7 +2698,7 @@ def : Pat<(bf16 (sint_to_fp i64:$a)),
2720
2698
2721
2699
// uint -> bf16
2722
2700
def : Pat<(bf16 (uint_to_fp i1:$a)),
2723
- (CVT_bf16_u32 (SELP_u32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2701
+ (CVT_bf16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2724
2702
def : Pat<(bf16 (uint_to_fp i16:$a)),
2725
2703
(CVT_bf16_u16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
2726
2704
def : Pat<(bf16 (uint_to_fp i32:$a)),
@@ -2730,7 +2708,7 @@ def : Pat<(bf16 (uint_to_fp i64:$a)),
2730
2708
2731
2709
// sint -> f32
2732
2710
def : Pat<(f32 (sint_to_fp i1:$a)),
2733
- (CVT_f32_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2711
+ (CVT_f32_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
2734
2712
def : Pat<(f32 (sint_to_fp i16:$a)),
2735
2713
(CVT_f32_s16 $a, CvtRN)>;
2736
2714
def : Pat<(f32 (sint_to_fp i32:$a)),
@@ -2740,7 +2718,7 @@ def : Pat<(f32 (sint_to_fp i64:$a)),
2740
2718
2741
2719
// uint -> f32
2742
2720
def : Pat<(f32 (uint_to_fp i1:$a)),
2743
- (CVT_f32_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2721
+ (CVT_f32_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
2744
2722
def : Pat<(f32 (uint_to_fp i16:$a)),
2745
2723
(CVT_f32_u16 $a, CvtRN)>;
2746
2724
def : Pat<(f32 (uint_to_fp i32:$a)),
@@ -2750,7 +2728,7 @@ def : Pat<(f32 (uint_to_fp i64:$a)),
2750
2728
2751
2729
// sint -> f64
2752
2730
def : Pat<(f64 (sint_to_fp i1:$a)),
2753
- (CVT_f64_s32 (SELP_s32ii -1, 0, $a), CvtRN)>;
2731
+ (CVT_f64_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
2754
2732
def : Pat<(f64 (sint_to_fp i16:$a)),
2755
2733
(CVT_f64_s16 $a, CvtRN)>;
2756
2734
def : Pat<(f64 (sint_to_fp i32:$a)),
@@ -2760,7 +2738,7 @@ def : Pat<(f64 (sint_to_fp i64:$a)),
2760
2738
2761
2739
// uint -> f64
2762
2740
def : Pat<(f64 (uint_to_fp i1:$a)),
2763
- (CVT_f64_u32 (SELP_u32ii 1, 0, $a), CvtRN)>;
2741
+ (CVT_f64_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
2764
2742
def : Pat<(f64 (uint_to_fp i16:$a)),
2765
2743
(CVT_f64_u16 $a, CvtRN)>;
2766
2744
def : Pat<(f64 (uint_to_fp i32:$a)),
@@ -2862,27 +2840,27 @@ def : Pat<(i64 (fp_to_uint f64:$a)),
2862
2840
2863
2841
// sext i1
2864
2842
def : Pat<(i16 (sext i1:$a)),
2865
- (SELP_s16ii -1, 0, $a)>;
2843
+ (SELP_b16ii -1, 0, $a)>;
2866
2844
def : Pat<(i32 (sext i1:$a)),
2867
- (SELP_s32ii -1, 0, $a)>;
2845
+ (SELP_b32ii -1, 0, $a)>;
2868
2846
def : Pat<(i64 (sext i1:$a)),
2869
- (SELP_s64ii -1, 0, $a)>;
2847
+ (SELP_b64ii -1, 0, $a)>;
2870
2848
2871
2849
// zext i1
2872
2850
def : Pat<(i16 (zext i1:$a)),
2873
- (SELP_u16ii 1, 0, $a)>;
2851
+ (SELP_b16ii 1, 0, $a)>;
2874
2852
def : Pat<(i32 (zext i1:$a)),
2875
- (SELP_u32ii 1, 0, $a)>;
2853
+ (SELP_b32ii 1, 0, $a)>;
2876
2854
def : Pat<(i64 (zext i1:$a)),
2877
- (SELP_u64ii 1, 0, $a)>;
2855
+ (SELP_b64ii 1, 0, $a)>;
2878
2856
2879
2857
// anyext i1
2880
2858
def : Pat<(i16 (anyext i1:$a)),
2881
- (SELP_u16ii -1, 0, $a)>;
2859
+ (SELP_b16ii -1, 0, $a)>;
2882
2860
def : Pat<(i32 (anyext i1:$a)),
2883
- (SELP_u32ii -1, 0, $a)>;
2861
+ (SELP_b32ii -1, 0, $a)>;
2884
2862
def : Pat<(i64 (anyext i1:$a)),
2885
- (SELP_u64ii -1, 0, $a)>;
2863
+ (SELP_b64ii -1, 0, $a)>;
2886
2864
2887
2865
// sext i16
2888
2866
def : Pat<(i32 (sext i16:$a)),
0 commit comments