Skip to content

Commit f83ef28

Browse files
authored
[NVPTX] Remove redundant addressing mode instrs (#128044)
Remove load and store instructions which do not include an immediate, and just use the immediate variants in all cases. These variants will be emitted exactly the same when the immediate offset is 0. Removing the non-immediate versions allows for the removal of a lot of code and would make any MachineIR passes simpler.
1 parent dcc08a1 commit f83ef28

9 files changed

+86
-557
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 62 additions & 381 deletions
Large diffs are not rendered by default.

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,14 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
107107
// Match direct address complex pattern.
108108
bool SelectDirectAddr(SDValue N, SDValue &Address);
109109

110-
bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
110+
void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
111111
SDValue &Offset, MVT VT);
112112
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
113113
SDValue &Offset);
114114
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
115115
SDValue &Offset);
116-
bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
117-
SDValue &Offset, MVT VT);
118116
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
119117
SDValue &Offset);
120-
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
121-
SDValue &Offset);
122118

123119
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
124120

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -2754,24 +2754,6 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
27542754
// Load / Store Handling
27552755
//
27562756
multiclass LD<NVPTXRegClass regclass> {
2757-
def _avar : NVPTXInst<
2758-
(outs regclass:$dst),
2759-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2760-
i32imm:$fromWidth, imem:$addr),
2761-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2762-
"\t$dst, [$addr];", []>;
2763-
def _areg : NVPTXInst<
2764-
(outs regclass:$dst),
2765-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2766-
i32imm:$fromWidth, Int32Regs:$addr),
2767-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2768-
"\t$dst, [$addr];", []>;
2769-
def _areg_64 : NVPTXInst<
2770-
(outs regclass:$dst),
2771-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2772-
i32imm:$fromWidth, Int64Regs:$addr),
2773-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2774-
"\t$dst, [$addr];", []>;
27752757
def _ari : NVPTXInst<
27762758
(outs regclass:$dst),
27772759
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2802,24 +2784,6 @@ let mayLoad=1, hasSideEffects=0 in {
28022784
}
28032785

28042786
multiclass ST<NVPTXRegClass regclass> {
2805-
def _avar : NVPTXInst<
2806-
(outs),
2807-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2808-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2809-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2810-
" \t[$addr], $src;", []>;
2811-
def _areg : NVPTXInst<
2812-
(outs),
2813-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2814-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2815-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2816-
" \t[$addr], $src;", []>;
2817-
def _areg_64 : NVPTXInst<
2818-
(outs),
2819-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2820-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2821-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2822-
" \t[$addr], $src;", []>;
28232787
def _ari : NVPTXInst<
28242788
(outs),
28252789
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
@@ -2856,24 +2820,6 @@ let mayStore=1, hasSideEffects=0 in {
28562820
// elementization happens at the machine instruction level, so the following
28572821
// instructions never appear in the DAG.
28582822
multiclass LD_VEC<NVPTXRegClass regclass> {
2859-
def _v2_avar : NVPTXInst<
2860-
(outs regclass:$dst1, regclass:$dst2),
2861-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2862-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2863-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2864-
"\t{{$dst1, $dst2}}, [$addr];", []>;
2865-
def _v2_areg : NVPTXInst<
2866-
(outs regclass:$dst1, regclass:$dst2),
2867-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2868-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2869-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2870-
"\t{{$dst1, $dst2}}, [$addr];", []>;
2871-
def _v2_areg_64 : NVPTXInst<
2872-
(outs regclass:$dst1, regclass:$dst2),
2873-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2874-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2875-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2876-
"\t{{$dst1, $dst2}}, [$addr];", []>;
28772823
def _v2_ari : NVPTXInst<
28782824
(outs regclass:$dst1, regclass:$dst2),
28792825
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2892,24 +2838,6 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
28922838
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
28932839
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
28942840
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
2895-
def _v4_avar : NVPTXInst<
2896-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2897-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2898-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2899-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2900-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
2901-
def _v4_areg : NVPTXInst<
2902-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2903-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2904-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2905-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2906-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
2907-
def _v4_areg_64 : NVPTXInst<
2908-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2909-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2910-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2911-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2912-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
29132841
def _v4_ari : NVPTXInst<
29142842
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29152843
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2939,27 +2867,6 @@ let mayLoad=1, hasSideEffects=0 in {
29392867
}
29402868

29412869
multiclass ST_VEC<NVPTXRegClass regclass> {
2942-
def _v2_avar : NVPTXInst<
2943-
(outs),
2944-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2945-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2946-
imem:$addr),
2947-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2948-
"\t[$addr], {{$src1, $src2}};", []>;
2949-
def _v2_areg : NVPTXInst<
2950-
(outs),
2951-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2952-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2953-
Int32Regs:$addr),
2954-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2955-
"\t[$addr], {{$src1, $src2}};", []>;
2956-
def _v2_areg_64 : NVPTXInst<
2957-
(outs),
2958-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2959-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2960-
Int64Regs:$addr),
2961-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2962-
"\t[$addr], {{$src1, $src2}};", []>;
29632870
def _v2_ari : NVPTXInst<
29642871
(outs),
29652872
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
@@ -2981,27 +2888,6 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
29812888
imem:$addr, Offseti32imm:$offset),
29822889
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29832890
"\t[$addr$offset], {{$src1, $src2}};", []>;
2984-
def _v4_avar : NVPTXInst<
2985-
(outs),
2986-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2987-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2988-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2989-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2990-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
2991-
def _v4_areg : NVPTXInst<
2992-
(outs),
2993-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2994-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2995-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2996-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2997-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
2998-
def _v4_areg_64 : NVPTXInst<
2999-
(outs),
3000-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
3001-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
3002-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
3003-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3004-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
30052891
def _v4_ari : NVPTXInst<
30062892
(outs),
30072893
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2693,12 +2693,6 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
26932693
// Scalar
26942694

26952695
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2696-
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2697-
!strconcat("ldu.global.", TyStr),
2698-
[]>, Requires<[hasLDU]>;
2699-
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2700-
!strconcat("ldu.global.", TyStr),
2701-
[]>, Requires<[hasLDU]>;
27022696
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
27032697
!strconcat("ldu.global.", TyStr),
27042698
[]>, Requires<[hasLDU]>;
@@ -2721,12 +2715,6 @@ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
27212715

27222716
// Elementized vector ldu
27232717
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2724-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2725-
(ins Int32Regs:$src),
2726-
!strconcat("ldu.global.", TyStr), []>;
2727-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2728-
(ins Int64Regs:$src),
2729-
!strconcat("ldu.global.", TyStr), []>;
27302718
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27312719
(ins MEMri:$src),
27322720
!strconcat("ldu.global.", TyStr), []>;
@@ -2739,12 +2727,6 @@ multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
27392727
}
27402728

27412729
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2742-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2743-
regclass:$dst4), (ins Int32Regs:$src),
2744-
!strconcat("ldu.global.", TyStr), []>;
2745-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2746-
regclass:$dst4), (ins Int64Regs:$src),
2747-
!strconcat("ldu.global.", TyStr), []>;
27482730
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27492731
regclass:$dst4), (ins MEMri:$src),
27502732
!strconcat("ldu.global.", TyStr), []>;
@@ -2796,12 +2778,6 @@ defm INT_PTX_LDU_G_v4f32_ELE
27962778
// during the lifetime of the kernel.
27972779

27982780
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2799-
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2800-
!strconcat("ld.global.nc.", TyStr),
2801-
[]>, Requires<[hasLDG]>;
2802-
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2803-
!strconcat("ld.global.nc.", TyStr),
2804-
[]>, Requires<[hasLDG]>;
28052781
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
28062782
!strconcat("ld.global.nc.", TyStr),
28072783
[]>, Requires<[hasLDG]>;
@@ -2830,12 +2806,6 @@ defm INT_PTX_LDG_GLOBAL_f64
28302806

28312807
// Elementized vector ldg
28322808
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2833-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2834-
(ins Int32Regs:$src),
2835-
!strconcat("ld.global.nc.", TyStr), []>;
2836-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2837-
(ins Int64Regs:$src),
2838-
!strconcat("ld.global.nc.", TyStr), []>;
28392809
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28402810
(ins MEMri:$src),
28412811
!strconcat("ld.global.nc.", TyStr), []>;

llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1800,7 +1800,7 @@ bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op,
18001800
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
18011801

18021802
switch (TexHandleDef.getOpcode()) {
1803-
case NVPTX::LD_i64_avar: {
1803+
case NVPTX::LD_i64_asi: {
18041804
// The handle is a parameter value being loaded, replace with the
18051805
// parameter symbol
18061806
const auto &TM = static_cast<const NVPTXTargetMachine &>(MF.getTarget());

llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ registers:
1616
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
19-
%0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
19+
%0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
2020
; CHECK: [[@LINE+1]]:33: expected a floating point literal
2121
%1 = FADD_rnf32ri %0, float 3
2222
StoreRetvalF32 %1, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ registers:
4040
- { id: 7, class: float32regs }
4141
body: |
4242
bb.0.entry:
43-
%0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test_param_0
43+
%0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0
4444
%1 = CVT_f64_f32 %0, 0
45-
%2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test_param_1
45+
%2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0
4646
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
4747
%3 = FADD_rnf64ri %1, double 3.250000e+00
4848
%4 = CVT_f32_f64 %3, 5
@@ -66,9 +66,9 @@ registers:
6666
- { id: 7, class: float32regs }
6767
body: |
6868
bb.0.entry:
69-
%0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test2_param_0
69+
%0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0
7070
%1 = CVT_f64_f32 %0, 0
71-
%2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test2_param_1
71+
%2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0
7272
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
7373
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
7474
%4 = CVT_f32_f64 %3, 5

llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ registers:
1616
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
19-
%0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
19+
%0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
2020
; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
2121
%1 = FADD_rnf32ri %0, float 0xH3C00
2222
StoreRetvalF32 %1, 0

llvm/test/CodeGen/NVPTX/variadics-backend.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,21 +215,18 @@ define dso_local i32 @bar() {
215215
; CHECK-PTX-NEXT: .reg .b64 %SPL;
216216
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
217217
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
218+
; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
219219
; CHECK-PTX-EMPTY:
220220
; CHECK-PTX-NEXT: // %bb.0: // %entry
221221
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
222222
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
223223
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
224-
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
225-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
224+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd1+7];
226225
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
227226
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
228-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
229-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
227+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd1+5];
230228
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
231-
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
232-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
229+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd1+6];
233230
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
234231
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
235232
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
@@ -238,14 +235,14 @@ define dso_local i32 @bar() {
238235
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
239236
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
240237
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
241-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
242-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
243-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
238+
; CHECK-PTX-NEXT: mov.b64 %rd2, 1;
239+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd2;
240+
; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
244241
; CHECK-PTX-NEXT: { // callseq 1, 0
245242
; CHECK-PTX-NEXT: .param .b32 param0;
246243
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
247244
; CHECK-PTX-NEXT: .param .b64 param1;
248-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
245+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
249246
; CHECK-PTX-NEXT: .param .b32 retval0;
250247
; CHECK-PTX-NEXT: call.uni (retval0),
251248
; CHECK-PTX-NEXT: variadics2,
@@ -384,26 +381,25 @@ define dso_local void @qux() {
384381
; CHECK-PTX-NEXT: .reg .b64 %SP;
385382
; CHECK-PTX-NEXT: .reg .b64 %SPL;
386383
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
387-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
384+
; CHECK-PTX-NEXT: .reg .b64 %rd<6>;
388385
; CHECK-PTX-EMPTY:
389386
; CHECK-PTX-NEXT: // %bb.0: // %entry
390387
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
391388
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
392389
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
393390
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
394391
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
395-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
396-
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
397-
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
398-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
399-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
400-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
392+
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [%rd2+8];
393+
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd3;
394+
; CHECK-PTX-NEXT: mov.b64 %rd4, 1;
395+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd4;
396+
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
401397
; CHECK-PTX-NEXT: { // callseq 3, 0
402398
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
403399
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
404-
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
400+
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd3;
405401
; CHECK-PTX-NEXT: .param .b64 param1;
406-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
402+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
407403
; CHECK-PTX-NEXT: .param .b32 retval0;
408404
; CHECK-PTX-NEXT: call.uni (retval0),
409405
; CHECK-PTX-NEXT: variadics4,

0 commit comments

Comments
 (0)