@@ -2718,80 +2718,64 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2718
2718
// Scalar
2719
2719
2720
2720
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2721
- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2722
- !strconcat( "ldu.global.", TyStr) ,
2721
+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2722
+ "ldu.global." # TyStr # " \t$result, [$src$offset];" ,
2723
2723
[]>, Requires<[hasLDU]>;
2724
2724
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2725
- !strconcat( "ldu.global.", TyStr) ,
2725
+ "ldu.global." # TyStr # " \t$result, [$src];" ,
2726
2726
[]>, Requires<[hasLDU]>;
2727
2727
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2728
- !strconcat( "ldu.global.", TyStr) ,
2728
+ "ldu.global." # TyStr # " \t$result, [$src];" ,
2729
2729
[]>, Requires<[hasLDU]>;
2730
2730
}
2731
2731
2732
- defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src]; ", Int16Regs>;
2733
- defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src]; ", Int16Regs>;
2734
- defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src]; ", Int32Regs>;
2735
- defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src]; ", Int64Regs>;
2736
- defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src]; ", Float32Regs>;
2737
- defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src]; ", Float64Regs>;
2732
+ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>;
2733
+ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>;
2734
+ defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>;
2735
+ defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>;
2736
+ defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>;
2737
+ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>;
2738
2738
2739
2739
// vector
2740
2740
2741
2741
// Elementized vector ldu
2742
2742
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2743
2743
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2744
2744
(ins MEMri:$src),
2745
- !strconcat( "ldu.global.", TyStr) , []>;
2745
+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2746
2746
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2747
2747
(ins MEMri64:$src),
2748
- !strconcat( "ldu.global.", TyStr) , []>;
2749
- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2750
- (ins imemAny:$src),
2751
- !strconcat( "ldu.global.", TyStr) , []>;
2748
+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2749
+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2750
+ (ins imemAny:$src, Offseti32imm:$offset ),
2751
+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
2752
2752
}
2753
2753
2754
2754
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2755
2755
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2756
2756
regclass:$dst4), (ins MEMri:$src),
2757
- !strconcat( "ldu.global.", TyStr) , []>;
2757
+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
2758
2758
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2759
2759
regclass:$dst4), (ins MEMri64:$src),
2760
- !strconcat("ldu.global.", TyStr), []>;
2761
- def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2762
- regclass:$dst4), (ins imemAny:$src),
2763
- !strconcat("ldu.global.", TyStr), []>;
2764
- }
2765
-
2766
- defm INT_PTX_LDU_G_v2i8_ELE
2767
- : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2768
- defm INT_PTX_LDU_G_v2i16_ELE
2769
- : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2770
- defm INT_PTX_LDU_G_v2i32_ELE
2771
- : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2772
- defm INT_PTX_LDU_G_v2f32_ELE
2773
- : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2774
- defm INT_PTX_LDU_G_v2i64_ELE
2775
- : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2776
- defm INT_PTX_LDU_G_v2f64_ELE
2777
- : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2778
- defm INT_PTX_LDU_G_v4i8_ELE
2779
- : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2780
- defm INT_PTX_LDU_G_v4i16_ELE
2781
- : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2782
- Int16Regs>;
2783
- defm INT_PTX_LDU_G_v4i32_ELE
2784
- : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2785
- Int32Regs>;
2786
- defm INT_PTX_LDU_G_v4f16_ELE
2787
- : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2788
- Int16Regs>;
2789
- defm INT_PTX_LDU_G_v4f16x2_ELE
2790
- : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2791
- Int32Regs>;
2792
- defm INT_PTX_LDU_G_v4f32_ELE
2793
- : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2794
- Float32Regs>;
2760
+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2761
+ def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2762
+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset),
2763
+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>;
2764
+ }
2765
+
2766
+ defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>;
2767
+ defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>;
2768
+ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>;
2769
+ defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>;
2770
+ defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>;
2771
+ defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>;
2772
+
2773
+ defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>;
2774
+ defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>;
2775
+ defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>;
2776
+ defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2777
+ defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2778
+ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
2795
2779
2796
2780
2797
2781
//-----------------------------------
@@ -2803,84 +2787,63 @@ defm INT_PTX_LDU_G_v4f32_ELE
2803
2787
// during the lifetime of the kernel.
2804
2788
2805
2789
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2806
- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2807
- !strconcat( "ld.global.nc.", TyStr) ,
2790
+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2791
+ "ld.global.nc." # TyStr # " \t$result, [$src$offset];" ,
2808
2792
[]>, Requires<[hasLDG]>;
2809
2793
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2810
- !strconcat( "ld.global.nc.", TyStr) ,
2794
+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
2811
2795
[]>, Requires<[hasLDG]>;
2812
2796
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2813
- !strconcat( "ld.global.nc.", TyStr) ,
2797
+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
2814
2798
[]>, Requires<[hasLDG]>;
2815
2799
}
2816
2800
2817
- defm INT_PTX_LDG_GLOBAL_i8
2818
- : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2819
- defm INT_PTX_LDG_GLOBAL_i16
2820
- : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2821
- defm INT_PTX_LDG_GLOBAL_i32
2822
- : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2823
- defm INT_PTX_LDG_GLOBAL_i64
2824
- : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2825
- defm INT_PTX_LDG_GLOBAL_f32
2826
- : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2827
- defm INT_PTX_LDG_GLOBAL_f64
2828
- : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2801
+ defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>;
2802
+ defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>;
2803
+ defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>;
2804
+ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>;
2805
+ defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>;
2806
+ defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>;
2829
2807
2830
2808
// vector
2831
2809
2832
2810
// Elementized vector ldg
2833
2811
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2834
2812
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2835
2813
(ins MEMri:$src),
2836
- !strconcat( "ld.global.nc.", TyStr) , []>;
2814
+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2837
2815
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2838
2816
(ins MEMri64:$src),
2839
- !strconcat( "ld.global.nc.", TyStr) , []>;
2840
- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2841
- (ins imemAny:$src),
2842
- !strconcat( "ld.global.nc.", TyStr) , []>;
2817
+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2818
+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2819
+ (ins imemAny:$src, Offseti32imm:$offset ),
2820
+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
2843
2821
}
2844
2822
2845
2823
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2846
- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2847
- regclass:$dst4), (ins Int32Regs:$src),
2848
- !strconcat("ld.global.nc.", TyStr), []>;
2849
- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2850
- regclass:$dst4), (ins Int64Regs:$src),
2851
- !strconcat("ld.global.nc.", TyStr), []>;
2852
2824
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2853
2825
regclass:$dst4), (ins MEMri:$src),
2854
- !strconcat( "ld.global.nc.", TyStr) , []>;
2826
+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
2855
2827
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2856
2828
regclass:$dst4), (ins MEMri64:$src),
2857
- !strconcat( "ld.global.nc.", TyStr) , []>;
2858
- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2859
- regclass:$dst4), (ins imemAny:$src),
2860
- !strconcat( "ld.global.nc.", TyStr) , []>;
2829
+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
2830
+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2831
+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset ),
2832
+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];" , []>;
2861
2833
}
2862
2834
2863
2835
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2864
- defm INT_PTX_LDG_G_v2i8_ELE
2865
- : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2866
- defm INT_PTX_LDG_G_v2i16_ELE
2867
- : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2868
- defm INT_PTX_LDG_G_v2i32_ELE
2869
- : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2870
- defm INT_PTX_LDG_G_v2f32_ELE
2871
- : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2872
- defm INT_PTX_LDG_G_v2i64_ELE
2873
- : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2874
- defm INT_PTX_LDG_G_v2f64_ELE
2875
- : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2876
- defm INT_PTX_LDG_G_v4i8_ELE
2877
- : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2878
- defm INT_PTX_LDG_G_v4i16_ELE
2879
- : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2880
- defm INT_PTX_LDG_G_v4i32_ELE
2881
- : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2882
- defm INT_PTX_LDG_G_v4f32_ELE
2883
- : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2836
+ defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>;
2837
+ defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>;
2838
+ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>;
2839
+ defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>;
2840
+ defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>;
2841
+ defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>;
2842
+
2843
+ defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>;
2844
+ defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>;
2845
+ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
2846
+ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
2884
2847
2885
2848
2886
2849
multiclass NG_TO_G<string Str> {
0 commit comments