Skip to content

Commit 0f9109c

Browse files
kovdan01asavonic
authored andcommitted
[NVPTX] Eliminate StoreRetval instructions with undef operand
Previously a lot of StoreRetval instructions with undef operand were generated on NVPTX target when a big struct was returned by value. It resulted in a lot of unneeded st.param.* instructions in final assembly. The patch solves the issue by implementing the logic in NVPTX-specific part of DAG combiner. Differential Revision: https://reviews.llvm.org/D118973
1 parent e931f92 commit 0f9109c

File tree

2 files changed

+89
-0
lines changed

2 files changed

+89
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4475,6 +4475,17 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
44754475
return SDValue();
44764476
}
44774477

4478+
static SDValue PerformStoreRetvalCombine(SDNode *N) {
4479+
// Operands from the 2nd to the last one are the values to be stored
4480+
for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
4481+
if (!N->getOperand(I).isUndef())
4482+
return SDValue();
4483+
4484+
// Operand 0 is the previous value in the chain. Cannot return EntryToken
4485+
// as the previous value will become unused and eliminated later.
4486+
return N->getOperand(0);
4487+
}
4488+
44784489
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
44794490
///
44804491
static SDValue PerformADDCombine(SDNode *N,
@@ -4803,6 +4814,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
48034814
return PerformREMCombine(N, DCI, OptLevel);
48044815
case ISD::SETCC:
48054816
return PerformSETCCCombine(N, DCI);
4817+
case NVPTXISD::StoreRetval:
4818+
case NVPTXISD::StoreRetvalV2:
4819+
case NVPTXISD::StoreRetvalV4:
4820+
return PerformStoreRetvalCombine(N);
48064821
}
48074822
return SDValue();
48084823
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; RUN: llc < %s --mtriple=nvptx-unknown-unknown | FileCheck %s
2+
;
3+
; This is IR generated with clang using -O3 optimization level
4+
; and nvptx-unknown-unknown target from the following C code.
5+
;
6+
; struct StNoalign { unsigned int field[5]; };
7+
; struct StAlign8 { _Alignas(8) unsigned int field[5]; };
8+
; struct StAlign16 { _Alignas(16) unsigned int field[5]; };
9+
;
10+
; #define DECLARE_FUNC(StName) \
11+
; struct StName func_##StName(struct StName in) { \
12+
; struct StName ret; \
13+
; ret.field[4] = in.field[0]; \
14+
; return ret; \
15+
; } \
16+
;
17+
; DECLARE_FUNC(StNoalign)
18+
; DECLARE_FUNC(StAlign8)
19+
; DECLARE_FUNC(StAlign16)
20+
21+
%struct.StNoalign = type { [5 x i32] }
22+
23+
define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
24+
; CHECK-LABEL: .func{{.*}}func_StNoalign
25+
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StNoalign_param_0];
26+
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
27+
; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
28+
; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
29+
; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
30+
; CHECK: st.param.b32 [func_retval0+16], [[R1]];
31+
; CHECK-NEXT: ret;
32+
%arrayidx = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 0
33+
%1 = load i32, i32* %arrayidx, align 4
34+
%.fca.0.4.insert = insertvalue %struct.StNoalign { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison] }, i32 %1, 0, 4
35+
ret %struct.StNoalign %.fca.0.4.insert
36+
}
37+
38+
%struct.StAlign8 = type { [5 x i32], [4 x i8] }
39+
40+
define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
41+
; CHECK-LABEL: .func{{.*}}func_StAlign8
42+
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign8_param_0];
43+
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
44+
; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
45+
; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
46+
; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
47+
; CHECK: st.param.b32 [func_retval0+16], [[R1]];
48+
; CHECK-NOT: st.param.v4.b8 [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
49+
; CHECK-NEXT: ret;
50+
%arrayidx = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 0
51+
%1 = load i32, i32* %arrayidx, align 8
52+
%.fca.0.4.insert = insertvalue %struct.StAlign8 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [4 x i8] poison }, i32 %1, 0, 4
53+
ret %struct.StAlign8 %.fca.0.4.insert
54+
}
55+
56+
%struct.StAlign16 = type { [5 x i32], [12 x i8] }
57+
58+
define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
59+
; CHECK-LABEL: .func{{.*}}func_StAlign16
60+
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign16_param_0];
61+
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
62+
; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
63+
; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
64+
; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
65+
; CHECK: st.param.b32 [func_retval0+16], [[R1]];
66+
; CHECK-NOT: st.param.v4.b8 [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
67+
; CHECK-NOT: st.param.v4.b8 [func_retval0+24], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
68+
; CHECK-NOT: st.param.v4.b8 [func_retval0+28], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
69+
; CHECK-NEXT: ret;
70+
%arrayidx = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 0
71+
%1 = load i32, i32* %arrayidx, align 16
72+
%.fca.0.4.insert = insertvalue %struct.StAlign16 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [12 x i8] poison }, i32 %1, 0, 4
73+
ret %struct.StAlign16 %.fca.0.4.insert
74+
}

0 commit comments

Comments
 (0)