@@ -1703,39 +1703,6 @@ def SETP_bf16x2rr :
1703
1703
[]>,
1704
1704
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
1705
1705
1706
-
1707
- // FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
1708
- // "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
1709
- // reg, either u32, s32, or f32. Anyway these aren't used at the moment.
1710
-
1711
- let hasSideEffects = false in {
1712
- multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1713
- def rr : NVPTXInst<(outs Int32Regs:$dst),
1714
- (ins RC:$a, RC:$b, CmpMode:$cmp),
1715
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1716
- def ri : NVPTXInst<(outs Int32Regs:$dst),
1717
- (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1718
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1719
- def ir : NVPTXInst<(outs Int32Regs:$dst),
1720
- (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1721
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
1722
- }
1723
- }
1724
-
1725
- defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1726
- defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1727
- defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1728
- defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1729
- defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1730
- defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1731
- defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1732
- defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1733
- defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1734
- defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
1735
- defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
1736
- defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1737
- defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1738
-
1739
1706
//-----------------------------------
1740
1707
// Data Movement (Load / Store, Move)
1741
1708
//-----------------------------------
@@ -1842,16 +1809,7 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1842
1809
Instruction setp_32ir,
1843
1810
Instruction setp_64rr,
1844
1811
Instruction setp_64ri,
1845
- Instruction setp_64ir,
1846
- Instruction set_16rr,
1847
- Instruction set_16ri,
1848
- Instruction set_16ir,
1849
- Instruction set_32rr,
1850
- Instruction set_32ri,
1851
- Instruction set_32ir,
1852
- Instruction set_64rr,
1853
- Instruction set_64ri,
1854
- Instruction set_64ir> {
1812
+ Instruction setp_64ir> {
1855
1813
// i16 -> pred
1856
1814
def : Pat<(i1 (OpNode i16:$a, i16:$b)),
1857
1815
(setp_16rr $a, $b, Mode)>;
@@ -1873,38 +1831,13 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1873
1831
(setp_64ri $a, imm:$b, Mode)>;
1874
1832
def : Pat<(i1 (OpNode imm:$a, i64:$b)),
1875
1833
(setp_64ir imm:$a, $b, Mode)>;
1876
-
1877
- // i16 -> i32
1878
- def : Pat<(i32 (OpNode i16:$a, i16:$b)),
1879
- (set_16rr $a, $b, Mode)>;
1880
- def : Pat<(i32 (OpNode i16:$a, imm:$b)),
1881
- (set_16ri $a, imm:$b, Mode)>;
1882
- def : Pat<(i32 (OpNode imm:$a, i16:$b)),
1883
- (set_16ir imm:$a, $b, Mode)>;
1884
- // i32 -> i32
1885
- def : Pat<(i32 (OpNode i32:$a, i32:$b)),
1886
- (set_32rr $a, $b, Mode)>;
1887
- def : Pat<(i32 (OpNode i32:$a, imm:$b)),
1888
- (set_32ri $a, imm:$b, Mode)>;
1889
- def : Pat<(i32 (OpNode imm:$a, i32:$b)),
1890
- (set_32ir imm:$a, $b, Mode)>;
1891
- // i64 -> i32
1892
- def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)),
1893
- (set_64rr $a, $b, Mode)>;
1894
- def : Pat<(i32 (OpNode i64:$a, imm:$b)),
1895
- (set_64ri $a, imm:$b, Mode)>;
1896
- def : Pat<(i32 (OpNode imm:$a, i64:$b)),
1897
- (set_64ir imm:$a, $b, Mode)>;
1898
1834
}
1899
1835
1900
1836
multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1901
1837
: ISET_FORMAT<OpNode, Mode,
1902
1838
SETP_s16rr, SETP_s16ri, SETP_s16ir,
1903
1839
SETP_s32rr, SETP_s32ri, SETP_s32ir,
1904
- SETP_s64rr, SETP_s64ri, SETP_s64ir,
1905
- SET_s16rr, SET_s16ri, SET_s16ir,
1906
- SET_s32rr, SET_s32ri, SET_s32ir,
1907
- SET_s64rr, SET_s64ri, SET_s64ir> {
1840
+ SETP_s64rr, SETP_s64ri, SETP_s64ir> {
1908
1841
// TableGen doesn't like empty multiclasses.
1909
1842
def : PatLeaf<(i32 0)>;
1910
1843
}
@@ -1913,10 +1846,7 @@ multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1913
1846
: ISET_FORMAT<OpNode, Mode,
1914
1847
SETP_u16rr, SETP_u16ri, SETP_u16ir,
1915
1848
SETP_u32rr, SETP_u32ri, SETP_u32ir,
1916
- SETP_u64rr, SETP_u64ri, SETP_u64ir,
1917
- SET_u16rr, SET_u16ri, SET_u16ir,
1918
- SET_u32rr, SET_u32ri, SET_u32ir,
1919
- SET_u64rr, SET_u64ri, SET_u64ir> {
1849
+ SETP_u64rr, SETP_u64ri, SETP_u64ir> {
1920
1850
// TableGen doesn't like empty multiclasses.
1921
1851
def : PatLeaf<(i32 0)>;
1922
1852
}
@@ -2048,47 +1978,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2048
1978
(SETP_f64ri $a, fpimm:$b, Mode)>;
2049
1979
def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
2050
1980
(SETP_f64ir fpimm:$a, $b, Mode)>;
2051
-
2052
- // f16 -> i32
2053
- def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2054
- (SET_f16rr $a, $b, ModeFTZ)>,
2055
- Requires<[useFP16Math, doF32FTZ]>;
2056
- def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2057
- (SET_f16rr $a, $b, Mode)>,
2058
- Requires<[useFP16Math]>;
2059
-
2060
- // bf16 -> i32
2061
- def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2062
- (SET_bf16rr $a, $b, ModeFTZ)>,
2063
- Requires<[hasBF16Math, doF32FTZ]>;
2064
- def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2065
- (SET_bf16rr $a, $b, Mode)>,
2066
- Requires<[hasBF16Math]>;
2067
-
2068
- // f32 -> i32
2069
- def : Pat<(i32 (OpNode f32:$a, f32:$b)),
2070
- (SET_f32rr $a, $b, ModeFTZ)>,
2071
- Requires<[doF32FTZ]>;
2072
- def : Pat<(i32 (OpNode f32:$a, f32:$b)),
2073
- (SET_f32rr $a, $b, Mode)>;
2074
- def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
2075
- (SET_f32ri $a, fpimm:$b, ModeFTZ)>,
2076
- Requires<[doF32FTZ]>;
2077
- def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
2078
- (SET_f32ri $a, fpimm:$b, Mode)>;
2079
- def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
2080
- (SET_f32ir fpimm:$a, $b, ModeFTZ)>,
2081
- Requires<[doF32FTZ]>;
2082
- def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
2083
- (SET_f32ir fpimm:$a, $b, Mode)>;
2084
-
2085
- // f64 -> i32
2086
- def : Pat<(i32 (OpNode f64:$a, f64:$b)),
2087
- (SET_f64rr $a, $b, Mode)>;
2088
- def : Pat<(i32 (OpNode f64:$a, fpimm:$b)),
2089
- (SET_f64ri $a, fpimm:$b, Mode)>;
2090
- def : Pat<(i32 (OpNode fpimm:$a, f64:$b)),
2091
- (SET_f64ir fpimm:$a, $b, Mode)>;
2092
1981
}
2093
1982
2094
1983
defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
@@ -2899,17 +2788,17 @@ def : Pat<(i32 (trunc i64:$a)),
2899
2788
def : Pat<(i16 (trunc i64:$a)),
2900
2789
(CVT_u16_u64 $a, CvtNONE)>;
2901
2790
def : Pat<(i1 (trunc i64:$a)),
2902
- (SETP_b64ri (ANDb64ri $a, 1), 1, CmpEQ )>;
2791
+ (SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE )>;
2903
2792
2904
2793
// truncate i32
2905
2794
def : Pat<(i16 (trunc i32:$a)),
2906
2795
(CVT_u16_u32 $a, CvtNONE)>;
2907
2796
def : Pat<(i1 (trunc i32:$a)),
2908
- (SETP_b32ri (ANDb32ri $a, 1), 1, CmpEQ )>;
2797
+ (SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE )>;
2909
2798
2910
2799
// truncate i16
2911
2800
def : Pat<(i1 (trunc i16:$a)),
2912
- (SETP_b16ri (ANDb16ri $a, 1), 1, CmpEQ )>;
2801
+ (SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE )>;
2913
2802
2914
2803
// sext_inreg
2915
2804
def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
@@ -2919,31 +2808,6 @@ def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 $a)>;
2919
2808
def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 $a)>;
2920
2809
def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;
2921
2810
2922
-
2923
- // Select instructions with 32-bit predicates
2924
- def : Pat<(select i32:$pred, i16:$a, i16:$b),
2925
- (SELP_b16rr $a, $b,
2926
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2927
- def : Pat<(select i32:$pred, i32:$a, i32:$b),
2928
- (SELP_b32rr $a, $b,
2929
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2930
- def : Pat<(select i32:$pred, i64:$a, i64:$b),
2931
- (SELP_b64rr $a, $b,
2932
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2933
- def : Pat<(select i32:$pred, f16:$a, f16:$b),
2934
- (SELP_f16rr $a, $b,
2935
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2936
- def : Pat<(select i32:$pred, bf16:$a, bf16:$b),
2937
- (SELP_bf16rr $a, $b,
2938
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2939
- def : Pat<(select i32:$pred, f32:$a, f32:$b),
2940
- (SELP_f32rr $a, $b,
2941
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2942
- def : Pat<(select i32:$pred, f64:$a, f64:$b),
2943
- (SELP_f64rr $a, $b,
2944
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
2945
-
2946
-
2947
2811
let hasSideEffects = false in {
2948
2812
// pack a set of smaller int registers to a larger int register
2949
2813
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
0 commit comments