Skip to content

Commit 85f8bd1

Browse files
authored
[NVPTX] Combine addressing-mode variants of ld, st, wmma (#129102)
This change fold together the _ari, _ari64, and _asi variants of these instructions into a single instruction capable of holding any address. This allows for the removal of a lot of unnecessary code and moves us towards a standard way of representing an address in NVPTX.
1 parent 0ebf7b4 commit 85f8bd1

8 files changed

+296
-695
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 159 additions & 410 deletions
Large diffs are not rendered by default.

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
106106
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
107107
}
108108

109-
// Match direct address complex pattern.
110-
bool SelectDirectAddr(SDValue N, SDValue &Address);
111-
112-
void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
113-
SDValue &Offset, MVT VT);
114-
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
115-
SDValue &Offset);
116-
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
117-
SDValue &Offset);
118-
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
119-
SDValue &Offset);
109+
bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
120110

121111
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
122112

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 51 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,27 +1917,15 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
19171917
// Data Movement (Load / Store, Move)
19181918
//-----------------------------------
19191919

1920-
let WantsRoot = true in {
1921-
def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex]>;
1922-
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex]>;
1923-
}
1924-
def ADDRvar : ComplexPattern<iPTR, 1, "SelectDirectAddr", [], []>;
1920+
def addr : ComplexPattern<pAny, 2, "SelectADDR">;
19251921

1926-
def MEMri : Operand<i32> {
1927-
let PrintMethod = "printMemOperand";
1928-
let MIOperandInfo = (ops Int32Regs, i32imm);
1929-
}
1930-
def MEMri64 : Operand<i64> {
1931-
let PrintMethod = "printMemOperand";
1932-
let MIOperandInfo = (ops Int64Regs, i64imm);
1933-
}
1934-
1935-
def imem : Operand<iPTR> {
1922+
def ADDR_base : Operand<pAny> {
19361923
let PrintMethod = "printOperand";
19371924
}
19381925

1939-
def imemAny : Operand<pAny> {
1940-
let PrintMethod = "printOperand";
1926+
def ADDR : Operand<pAny> {
1927+
let PrintMethod = "printMemOperand";
1928+
let MIOperandInfo = (ops ADDR_base, i32imm);
19411929
}
19421930

19431931
def LdStCode : Operand<i32> {
@@ -1956,10 +1944,10 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
19561944
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
19571945

19581946
// Load a memory address into a u32 or u64 register.
1959-
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1947+
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
19601948
"mov.u32 \t$dst, $a;",
19611949
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
1962-
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1950+
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
19631951
"mov.u64 \t$dst, $a;",
19641952
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
19651953

@@ -2021,12 +2009,17 @@ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
20212009
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
20222010

20232011
//---- Copy Frame Index ----
2024-
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
2025-
"add.u32 \t$dst, ${addr:add};",
2026-
[(set i32:$dst, ADDRri:$addr)]>;
2027-
def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
2028-
"add.u64 \t$dst, ${addr:add};",
2029-
[(set i64:$dst, ADDRri64:$addr)]>;
2012+
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
2013+
"add.u32 \t$dst, ${addr:add};", []>;
2014+
def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr),
2015+
"add.u64 \t$dst, ${addr:add};", []>;
2016+
2017+
def to_tframeindex : SDNodeXForm<frameindex, [{
2018+
return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
2019+
}]>;
2020+
2021+
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
2022+
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
20302023

20312024
//-----------------------------------
20322025
// Comparison and Selection
@@ -2660,7 +2653,7 @@ def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
26602653
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
26612654
[(LastCallArg (i32 1), (i32 imm:$a))]>;
26622655

2663-
def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
2656+
def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
26642657
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
26652658
def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
26662659
[(CallVoid i32:$addr)]>;
@@ -2753,109 +2746,56 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
27532746
//
27542747
// Load / Store Handling
27552748
//
2756-
multiclass LD<NVPTXRegClass regclass> {
2757-
def _ari : NVPTXInst<
2749+
class LD<NVPTXRegClass regclass>
2750+
: NVPTXInst<
27582751
(outs regclass:$dst),
27592752
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2760-
i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
2761-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2762-
"\t$dst, [$addr$offset];", []>;
2763-
def _ari_64 : NVPTXInst<
2764-
(outs regclass:$dst),
2765-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2766-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
2767-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2768-
"\t$dst, [$addr$offset];", []>;
2769-
def _asi : NVPTXInst<
2770-
(outs regclass:$dst),
2771-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2772-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
2753+
i32imm:$fromWidth, ADDR:$addr),
27732754
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2774-
"\t$dst, [$addr$offset];", []>;
2775-
}
2755+
"\t$dst, [$addr];", []>;
27762756

27772757
let mayLoad=1, hasSideEffects=0 in {
2778-
defm LD_i8 : LD<Int16Regs>;
2779-
defm LD_i16 : LD<Int16Regs>;
2780-
defm LD_i32 : LD<Int32Regs>;
2781-
defm LD_i64 : LD<Int64Regs>;
2782-
defm LD_f32 : LD<Float32Regs>;
2783-
defm LD_f64 : LD<Float64Regs>;
2758+
def LD_i8 : LD<Int16Regs>;
2759+
def LD_i16 : LD<Int16Regs>;
2760+
def LD_i32 : LD<Int32Regs>;
2761+
def LD_i64 : LD<Int64Regs>;
2762+
def LD_f32 : LD<Float32Regs>;
2763+
def LD_f64 : LD<Float64Regs>;
27842764
}
27852765

2786-
multiclass ST<NVPTXRegClass regclass> {
2787-
def _ari : NVPTXInst<
2766+
class ST<NVPTXRegClass regclass>
2767+
: NVPTXInst<
27882768
(outs),
27892769
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2790-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
2791-
Offseti32imm:$offset),
2770+
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr),
27922771
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2793-
" \t[$addr$offset], $src;", []>;
2794-
def _ari_64 : NVPTXInst<
2795-
(outs),
2796-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2797-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
2798-
Offseti32imm:$offset),
2799-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2800-
" \t[$addr$offset], $src;", []>;
2801-
def _asi : NVPTXInst<
2802-
(outs),
2803-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2804-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
2805-
Offseti32imm:$offset),
2806-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2807-
" \t[$addr$offset], $src;", []>;
2808-
}
2772+
" \t[$addr], $src;", []>;
28092773

28102774
let mayStore=1, hasSideEffects=0 in {
2811-
defm ST_i8 : ST<Int16Regs>;
2812-
defm ST_i16 : ST<Int16Regs>;
2813-
defm ST_i32 : ST<Int32Regs>;
2814-
defm ST_i64 : ST<Int64Regs>;
2815-
defm ST_f32 : ST<Float32Regs>;
2816-
defm ST_f64 : ST<Float64Regs>;
2775+
def ST_i8 : ST<Int16Regs>;
2776+
def ST_i16 : ST<Int16Regs>;
2777+
def ST_i32 : ST<Int32Regs>;
2778+
def ST_i64 : ST<Int64Regs>;
2779+
def ST_f32 : ST<Float32Regs>;
2780+
def ST_f64 : ST<Float64Regs>;
28172781
}
28182782

28192783
// The following is used only in and after vector elementizations. Vector
28202784
// elementization happens at the machine instruction level, so the following
28212785
// instructions never appear in the DAG.
28222786
multiclass LD_VEC<NVPTXRegClass regclass> {
2823-
def _v2_ari : NVPTXInst<
2824-
(outs regclass:$dst1, regclass:$dst2),
2825-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2826-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
2827-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2828-
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
2829-
def _v2_ari_64 : NVPTXInst<
2787+
def _v2 : NVPTXInst<
28302788
(outs regclass:$dst1, regclass:$dst2),
28312789
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2832-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
2790+
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
28332791
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2834-
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
2835-
def _v2_asi : NVPTXInst<
2836-
(outs regclass:$dst1, regclass:$dst2),
2837-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2838-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
2839-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2840-
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
2841-
def _v4_ari : NVPTXInst<
2792+
"\t{{$dst1, $dst2}}, [$addr];", []>;
2793+
def _v4 : NVPTXInst<
28422794
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
28432795
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2844-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
2796+
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
28452797
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2846-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
2847-
def _v4_ari_64 : NVPTXInst<
2848-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2849-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2850-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
2851-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2852-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
2853-
def _v4_asi : NVPTXInst<
2854-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2855-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2856-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
2857-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2858-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
2798+
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
28592799
}
28602800
let mayLoad=1, hasSideEffects=0 in {
28612801
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -2867,48 +2807,20 @@ let mayLoad=1, hasSideEffects=0 in {
28672807
}
28682808

28692809
multiclass ST_VEC<NVPTXRegClass regclass> {
2870-
def _v2_ari : NVPTXInst<
2871-
(outs),
2872-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2873-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2874-
Int32Regs:$addr, Offseti32imm:$offset),
2875-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2876-
"\t[$addr$offset], {{$src1, $src2}};", []>;
2877-
def _v2_ari_64 : NVPTXInst<
2810+
def _v2 : NVPTXInst<
28782811
(outs),
28792812
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
28802813
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2881-
Int64Regs:$addr, Offseti32imm:$offset),
2814+
ADDR:$addr),
28822815
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2883-
"\t[$addr$offset], {{$src1, $src2}};", []>;
2884-
def _v2_asi : NVPTXInst<
2885-
(outs),
2886-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2887-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2888-
imem:$addr, Offseti32imm:$offset),
2889-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2890-
"\t[$addr$offset], {{$src1, $src2}};", []>;
2891-
def _v4_ari : NVPTXInst<
2816+
"\t[$addr], {{$src1, $src2}};", []>;
2817+
def _v4 : NVPTXInst<
28922818
(outs),
28932819
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
28942820
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2895-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
2821+
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
28962822
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2897-
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
2898-
def _v4_ari_64 : NVPTXInst<
2899-
(outs),
2900-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2901-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2902-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
2903-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2904-
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
2905-
def _v4_asi : NVPTXInst<
2906-
(outs),
2907-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2908-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2909-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
2910-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
2911-
"$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
2823+
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
29122824
}
29132825

29142826
let mayStore=1, hasSideEffects=0 in {

0 commit comments

Comments
 (0)