Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 4cc00e8

Browse files
committed
[SystemZ] Increase the number of VLREPs
If a loaded value is replicated it is best to combine these two operations into a VLREP (load and replicate), but isel will not produce this if the load has other users as well. This patch handles this by putting the other users of the load to use the REPLICATE 0-element instead of the load. This way the load has only the REPLICATE node as user, and we get a VLREP. Review: Ulrich Weigand https://reviews.llvm.org/D54264 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346746 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 01a109e commit 4cc00e8

File tree

4 files changed

+114
-0
lines changed

4 files changed

+114
-0
lines changed

lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
523523
setTargetDAGCombine(ISD::ZERO_EXTEND);
524524
setTargetDAGCombine(ISD::SIGN_EXTEND);
525525
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
526+
setTargetDAGCombine(ISD::LOAD);
526527
setTargetDAGCombine(ISD::STORE);
527528
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
528529
setTargetDAGCombine(ISD::FP_ROUND);
@@ -5368,6 +5369,46 @@ SDValue SystemZTargetLowering::combineMERGE(
53685369
return SDValue();
53695370
}
53705371

5372+
SDValue SystemZTargetLowering::combineLOAD(
5373+
SDNode *N, DAGCombinerInfo &DCI) const {
5374+
SelectionDAG &DAG = DCI.DAG;
5375+
EVT LdVT = N->getValueType(0);
5376+
if (LdVT.isVector() || LdVT.isInteger())
5377+
return SDValue();
5378+
// Transform a scalar load that is REPLICATEd as well as having other
5379+
// use(s) to the form where the other use(s) use the first element of the
5380+
// REPLICATE instead of the load. Otherwise instruction selection will not
5381+
// produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5382+
// point loads.
5383+
5384+
SDValue Replicate;
5385+
SmallVector<SDNode*, 8> OtherUses;
5386+
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5387+
UI != UE; ++UI) {
5388+
if (UI->getOpcode() == SystemZISD::REPLICATE) {
5389+
if (Replicate)
5390+
return SDValue(); // Should never happen
5391+
Replicate = SDValue(*UI, 0);
5392+
}
5393+
else if (UI.getUse().getResNo() == 0)
5394+
OtherUses.push_back(*UI);
5395+
}
5396+
if (!Replicate || OtherUses.empty())
5397+
return SDValue();
5398+
5399+
SDLoc DL(N);
5400+
SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
5401+
Replicate, DAG.getConstant(0, DL, MVT::i32));
5402+
// Update uses of the loaded Value while preserving old chains.
5403+
for (SDNode *U : OtherUses) {
5404+
SmallVector<SDValue, 8> Ops;
5405+
for (SDValue Op : U->ops())
5406+
Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
5407+
DAG.UpdateNodeOperands(U, Ops);
5408+
}
5409+
return SDValue(N, 0);
5410+
}
5411+
53715412
SDValue SystemZTargetLowering::combineSTORE(
53725413
SDNode *N, DAGCombinerInfo &DCI) const {
53735414
SelectionDAG &DAG = DCI.DAG;
@@ -5699,6 +5740,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
56995740
case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
57005741
case SystemZISD::MERGE_HIGH:
57015742
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
5743+
case ISD::LOAD: return combineLOAD(N, DCI);
57025744
case ISD::STORE: return combineSTORE(N, DCI);
57035745
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
57045746
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);

lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ class SystemZTargetLowering : public TargetLowering {
587587
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
588588
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
589589
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
590+
SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
590591
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
591592
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
592593
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;

test/CodeGen/SystemZ/vec-move-21.ll

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
2+
3+
; Test that a replicate of a load gets folded to vlrep also in cases where
4+
; the load has multiple users.
5+
6+
; CHECK-NOT: vrep
7+
8+
9+
define double @fun(double* %Vsrc, <2 x double> %T) {
10+
entry:
11+
%Vgep1 = getelementptr double, double* %Vsrc, i64 0
12+
%Vld1 = load double, double* %Vgep1
13+
%Vgep2 = getelementptr double, double* %Vsrc, i64 1
14+
%Vld2 = load double, double* %Vgep2
15+
%Vgep3 = getelementptr double, double* %Vsrc, i64 2
16+
%Vld3 = load double, double* %Vgep3
17+
%Vgep4 = getelementptr double, double* %Vsrc, i64 3
18+
%Vld4 = load double, double* %Vgep4
19+
%Vgep5 = getelementptr double, double* %Vsrc, i64 4
20+
%Vld5 = load double, double* %Vgep5
21+
%Vgep6 = getelementptr double, double* %Vsrc, i64 5
22+
%Vld6 = load double, double* %Vgep6
23+
24+
%V19 = insertelement <2 x double> undef, double %Vld1, i32 0
25+
%V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer
26+
%V21 = insertelement <2 x double> undef, double %Vld4, i32 0
27+
%V22 = insertelement <2 x double> %V21, double %Vld5, i32 1
28+
%V23 = fmul <2 x double> %V20, %V22
29+
%V24 = fadd <2 x double> %T, %V23
30+
%V25 = insertelement <2 x double> %V19, double %Vld2, i32 1
31+
%V26 = insertelement <2 x double> undef, double %Vld6, i32 0
32+
%V27 = insertelement <2 x double> %V26, double %Vld6, i32 1
33+
%V28 = fmul <2 x double> %V25, %V27
34+
%V29 = fadd <2 x double> %T, %V28
35+
%V30 = insertelement <2 x double> undef, double %Vld2, i32 0
36+
%V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer
37+
%V32 = insertelement <2 x double> undef, double %Vld5, i32 0
38+
%V33 = insertelement <2 x double> %V32, double %Vld6, i32 1
39+
%V34 = fmul <2 x double> %V31, %V33
40+
%V35 = fadd <2 x double> %T, %V34
41+
%V36 = insertelement <2 x double> undef, double %Vld3, i32 0
42+
%V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer
43+
%V38 = fmul <2 x double> %V37, %V22
44+
%V39 = fadd <2 x double> %T, %V38
45+
%Vmul37 = fmul double %Vld3, %Vld6
46+
%Vadd38 = fadd double %Vmul37, %Vmul37
47+
48+
%VA0 = fadd <2 x double> %V24, %V29
49+
%VA1 = fadd <2 x double> %VA0, %V35
50+
%VA2 = fadd <2 x double> %VA1, %V39
51+
52+
%VE0 = extractelement <2 x double> %VA2, i32 0
53+
%VS1 = fadd double %VE0, %Vadd38
54+
55+
ret double %VS1
56+
}

test/CodeGen/SystemZ/vec-move-22.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
2+
3+
; Test that a loaded value which is used both in a vector and scalar context
4+
; is not transformed to a vlrep + vlgvg.
5+
6+
; CHECK-NOT: vlrep
7+
8+
define void @fun(i64 %arg, i64** %Addr, <2 x i64*>* %Dst) {
9+
%tmp10 = load i64*, i64** %Addr
10+
store i64 %arg, i64* %tmp10
11+
%tmp12 = insertelement <2 x i64*> undef, i64* %tmp10, i32 0
12+
%tmp13 = insertelement <2 x i64*> %tmp12, i64* %tmp10, i32 1
13+
store <2 x i64*> %tmp13, <2 x i64*>* %Dst
14+
ret void
15+
}

0 commit comments

Comments
 (0)