Skip to content

Commit 422065b

Browse files
committed
[NVPTX] Remove redundant addressing mode instrs
1 parent 27e6561 commit 422065b

9 files changed

+78
-541
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 54 additions & 365 deletions
Large diffs are not rendered by default.

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,14 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
107107
// Match direct address complex pattern.
108108
bool SelectDirectAddr(SDValue N, SDValue &Address);
109109

110-
bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
110+
void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
111111
SDValue &Offset, MVT VT);
112112
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
113113
SDValue &Offset);
114114
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
115115
SDValue &Offset);
116-
bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
117-
SDValue &Offset, MVT VT);
118116
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
119117
SDValue &Offset);
120-
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
121-
SDValue &Offset);
122118

123119
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
124120

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -2754,24 +2754,6 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
27542754
// Load / Store Handling
27552755
//
27562756
multiclass LD<NVPTXRegClass regclass> {
2757-
def _avar : NVPTXInst<
2758-
(outs regclass:$dst),
2759-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2760-
i32imm:$fromWidth, imem:$addr),
2761-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2762-
"\t$dst, [$addr];", []>;
2763-
def _areg : NVPTXInst<
2764-
(outs regclass:$dst),
2765-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2766-
i32imm:$fromWidth, Int32Regs:$addr),
2767-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2768-
"\t$dst, [$addr];", []>;
2769-
def _areg_64 : NVPTXInst<
2770-
(outs regclass:$dst),
2771-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2772-
i32imm:$fromWidth, Int64Regs:$addr),
2773-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2774-
"\t$dst, [$addr];", []>;
27752757
def _ari : NVPTXInst<
27762758
(outs regclass:$dst),
27772759
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2802,24 +2784,6 @@ let mayLoad=1, hasSideEffects=0 in {
28022784
}
28032785

28042786
multiclass ST<NVPTXRegClass regclass> {
2805-
def _avar : NVPTXInst<
2806-
(outs),
2807-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2808-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2809-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2810-
" \t[$addr], $src;", []>;
2811-
def _areg : NVPTXInst<
2812-
(outs),
2813-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2814-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2815-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2816-
" \t[$addr], $src;", []>;
2817-
def _areg_64 : NVPTXInst<
2818-
(outs),
2819-
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2820-
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2821-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2822-
" \t[$addr], $src;", []>;
28232787
def _ari : NVPTXInst<
28242788
(outs),
28252789
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
@@ -2856,24 +2820,6 @@ let mayStore=1, hasSideEffects=0 in {
28562820
// elementization happens at the machine instruction level, so the following
28572821
// instructions never appear in the DAG.
28582822
multiclass LD_VEC<NVPTXRegClass regclass> {
2859-
def _v2_avar : NVPTXInst<
2860-
(outs regclass:$dst1, regclass:$dst2),
2861-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2862-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2863-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2864-
"\t{{$dst1, $dst2}}, [$addr];", []>;
2865-
def _v2_areg : NVPTXInst<
2866-
(outs regclass:$dst1, regclass:$dst2),
2867-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2868-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2869-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2870-
"\t{{$dst1, $dst2}}, [$addr];", []>;
2871-
def _v2_areg_64 : NVPTXInst<
2872-
(outs regclass:$dst1, regclass:$dst2),
2873-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2874-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2875-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2876-
"\t{{$dst1, $dst2}}, [$addr];", []>;
28772823
def _v2_ari : NVPTXInst<
28782824
(outs regclass:$dst1, regclass:$dst2),
28792825
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2892,24 +2838,6 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
28922838
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
28932839
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
28942840
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
2895-
def _v4_avar : NVPTXInst<
2896-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2897-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2898-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2899-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2900-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
2901-
def _v4_areg : NVPTXInst<
2902-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2903-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2904-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2905-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2906-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
2907-
def _v4_areg_64 : NVPTXInst<
2908-
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2909-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2910-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2911-
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2912-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
29132841
def _v4_ari : NVPTXInst<
29142842
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29152843
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2939,27 +2867,6 @@ let mayLoad=1, hasSideEffects=0 in {
29392867
}
29402868

29412869
multiclass ST_VEC<NVPTXRegClass regclass> {
2942-
def _v2_avar : NVPTXInst<
2943-
(outs),
2944-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2945-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2946-
imem:$addr),
2947-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2948-
"\t[$addr], {{$src1, $src2}};", []>;
2949-
def _v2_areg : NVPTXInst<
2950-
(outs),
2951-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2952-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2953-
Int32Regs:$addr),
2954-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2955-
"\t[$addr], {{$src1, $src2}};", []>;
2956-
def _v2_areg_64 : NVPTXInst<
2957-
(outs),
2958-
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
2959-
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
2960-
Int64Regs:$addr),
2961-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2962-
"\t[$addr], {{$src1, $src2}};", []>;
29632870
def _v2_ari : NVPTXInst<
29642871
(outs),
29652872
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
@@ -2981,27 +2888,6 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
29812888
imem:$addr, Offseti32imm:$offset),
29822889
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
29832890
"\t[$addr$offset], {{$src1, $src2}};", []>;
2984-
def _v4_avar : NVPTXInst<
2985-
(outs),
2986-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2987-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2988-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2989-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2990-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
2991-
def _v4_areg : NVPTXInst<
2992-
(outs),
2993-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2994-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2995-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2996-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2997-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
2998-
def _v4_areg_64 : NVPTXInst<
2999-
(outs),
3000-
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
3001-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
3002-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
3003-
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3004-
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
30052891
def _v4_ari : NVPTXInst<
30062892
(outs),
30072893
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2693,12 +2693,6 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
26932693
// Scalar
26942694

26952695
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2696-
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2697-
!strconcat("ldu.global.", TyStr),
2698-
[]>, Requires<[hasLDU]>;
2699-
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2700-
!strconcat("ldu.global.", TyStr),
2701-
[]>, Requires<[hasLDU]>;
27022696
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
27032697
!strconcat("ldu.global.", TyStr),
27042698
[]>, Requires<[hasLDU]>;
@@ -2721,12 +2715,6 @@ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
27212715

27222716
// Elementized vector ldu
27232717
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2724-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2725-
(ins Int32Regs:$src),
2726-
!strconcat("ldu.global.", TyStr), []>;
2727-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2728-
(ins Int64Regs:$src),
2729-
!strconcat("ldu.global.", TyStr), []>;
27302718
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27312719
(ins MEMri:$src),
27322720
!strconcat("ldu.global.", TyStr), []>;
@@ -2739,12 +2727,6 @@ multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
27392727
}
27402728

27412729
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2742-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2743-
regclass:$dst4), (ins Int32Regs:$src),
2744-
!strconcat("ldu.global.", TyStr), []>;
2745-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2746-
regclass:$dst4), (ins Int64Regs:$src),
2747-
!strconcat("ldu.global.", TyStr), []>;
27482730
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27492731
regclass:$dst4), (ins MEMri:$src),
27502732
!strconcat("ldu.global.", TyStr), []>;
@@ -2796,12 +2778,6 @@ defm INT_PTX_LDU_G_v4f32_ELE
27962778
// during the lifetime of the kernel.
27972779

27982780
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2799-
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2800-
!strconcat("ld.global.nc.", TyStr),
2801-
[]>, Requires<[hasLDG]>;
2802-
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2803-
!strconcat("ld.global.nc.", TyStr),
2804-
[]>, Requires<[hasLDG]>;
28052781
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
28062782
!strconcat("ld.global.nc.", TyStr),
28072783
[]>, Requires<[hasLDG]>;
@@ -2830,12 +2806,6 @@ defm INT_PTX_LDG_GLOBAL_f64
28302806

28312807
// Elementized vector ldg
28322808
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2833-
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2834-
(ins Int32Regs:$src),
2835-
!strconcat("ld.global.nc.", TyStr), []>;
2836-
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2837-
(ins Int64Regs:$src),
2838-
!strconcat("ld.global.nc.", TyStr), []>;
28392809
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28402810
(ins MEMri:$src),
28412811
!strconcat("ld.global.nc.", TyStr), []>;

llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1809,7 +1809,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
18091809
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
18101810

18111811
switch (TexHandleDef.getOpcode()) {
1812-
case NVPTX::LD_i64_avar: {
1812+
case NVPTX::LD_i64_asi: {
18131813
// The handle is a parameter value being loaded, replace with the
18141814
// parameter symbol
18151815
const NVPTXTargetMachine &TM =

llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ registers:
1616
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
19-
%0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
19+
%0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
2020
; CHECK: [[@LINE+1]]:33: expected a floating point literal
2121
%1 = FADD_rnf32ri %0, float 3
2222
StoreRetvalF32 %1, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ registers:
4040
- { id: 7, class: float32regs }
4141
body: |
4242
bb.0.entry:
43-
%0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test_param_0
43+
%0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0
4444
%1 = CVT_f64_f32 %0, 0
45-
%2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test_param_1
45+
%2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0
4646
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
4747
%3 = FADD_rnf64ri %1, double 3.250000e+00
4848
%4 = CVT_f32_f64 %3, 5
@@ -66,9 +66,9 @@ registers:
6666
- { id: 7, class: float32regs }
6767
body: |
6868
bb.0.entry:
69-
%0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test2_param_0
69+
%0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0
7070
%1 = CVT_f64_f32 %0, 0
71-
%2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test2_param_1
71+
%2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0
7272
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
7373
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
7474
%4 = CVT_f32_f64 %3, 5

llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ registers:
1616
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
19-
%0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
19+
%0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
2020
; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
2121
%1 = FADD_rnf32ri %0, float 0xH3C00
2222
StoreRetvalF32 %1, 0

llvm/test/CodeGen/NVPTX/variadics-backend.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,21 +215,18 @@ define dso_local i32 @bar() {
215215
; CHECK-PTX-NEXT: .reg .b64 %SPL;
216216
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
217217
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
218+
; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
219219
; CHECK-PTX-EMPTY:
220220
; CHECK-PTX-NEXT: // %bb.0: // %entry
221221
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
222222
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
223223
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
224-
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
225-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
224+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd1+7];
226225
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
227226
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
228-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
229-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
227+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd1+5];
230228
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
231-
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
232-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
229+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd1+6];
233230
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
234231
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
235232
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
@@ -238,14 +235,14 @@ define dso_local i32 @bar() {
238235
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
239236
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
240237
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
241-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
242-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
243-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
238+
; CHECK-PTX-NEXT: mov.b64 %rd2, 1;
239+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd2;
240+
; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
244241
; CHECK-PTX-NEXT: { // callseq 1, 0
245242
; CHECK-PTX-NEXT: .param .b32 param0;
246243
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
247244
; CHECK-PTX-NEXT: .param .b64 param1;
248-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
245+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
249246
; CHECK-PTX-NEXT: .param .b32 retval0;
250247
; CHECK-PTX-NEXT: call.uni (retval0),
251248
; CHECK-PTX-NEXT: variadics2,
@@ -384,26 +381,25 @@ define dso_local void @qux() {
384381
; CHECK-PTX-NEXT: .reg .b64 %SP;
385382
; CHECK-PTX-NEXT: .reg .b64 %SPL;
386383
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
387-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
384+
; CHECK-PTX-NEXT: .reg .b64 %rd<6>;
388385
; CHECK-PTX-EMPTY:
389386
; CHECK-PTX-NEXT: // %bb.0: // %entry
390387
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
391388
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
392389
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
393390
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
394391
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
395-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
396-
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
397-
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
398-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
399-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
400-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
392+
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [%rd2+8];
393+
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd3;
394+
; CHECK-PTX-NEXT: mov.b64 %rd4, 1;
395+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd4;
396+
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
401397
; CHECK-PTX-NEXT: { // callseq 3, 0
402398
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
403399
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
404-
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
400+
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd3;
405401
; CHECK-PTX-NEXT: .param .b64 param1;
406-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
402+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
407403
; CHECK-PTX-NEXT: .param .b32 retval0;
408404
; CHECK-PTX-NEXT: call.uni (retval0),
409405
; CHECK-PTX-NEXT: variadics4,

0 commit comments

Comments
 (0)