Skip to content

Commit 65a759c

Browse files
authored
Merge pull request apple#15 from bryanpkc/swift-3.0-branch
[SystemZ] Support LRVH and STRVH opcodes
2 parents 49e3340 + feb49a7 commit 65a759c

File tree

8 files changed

+417
-5
lines changed

8 files changed

+417
-5
lines changed

lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
439439
setTargetDAGCombine(ISD::STORE);
440440
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
441441
setTargetDAGCombine(ISD::FP_ROUND);
442+
setTargetDAGCombine(ISD::BSWAP);
442443

443444
// Handle intrinsics.
444445
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -4601,6 +4602,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
46014602
OPCODE(ATOMIC_LOADW_UMIN);
46024603
OPCODE(ATOMIC_LOADW_UMAX);
46034604
OPCODE(ATOMIC_CMP_SWAPW);
4605+
OPCODE(LRV);
4606+
OPCODE(STRV);
46044607
OPCODE(PREFETCH);
46054608
}
46064609
return nullptr;
@@ -4897,6 +4900,74 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
48974900
}
48984901
}
48994902
}
4903+
4904+
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
4905+
// These loads are allowed to access memory multiple times, and so we must check
4906+
// that the loads are not volatile before performing the combine.
4907+
if (Opcode == ISD::BSWAP &&
4908+
ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
4909+
N->getOperand(0).hasOneUse() &&
4910+
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
4911+
N->getValueType(0) == MVT::i64) &&
4912+
!cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
4913+
SDValue Load = N->getOperand(0);
4914+
LoadSDNode *LD = cast<LoadSDNode>(Load);
4915+
4916+
// Create the byte-swapping load.
4917+
SDValue Ops[] = {
4918+
LD->getChain(), // Chain
4919+
LD->getBasePtr(), // Ptr
4920+
DAG.getValueType(N->getValueType(0)) // VT
4921+
};
4922+
SDValue BSLoad =
4923+
DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
4924+
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
4925+
MVT::i64 : MVT::i32, MVT::Other),
4926+
Ops, LD->getMemoryVT(), LD->getMemOperand());
4927+
4928+
// If this is an i16 load, insert the truncate.
4929+
SDValue ResVal = BSLoad;
4930+
if (N->getValueType(0) == MVT::i16)
4931+
ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
4932+
4933+
// First, combine the bswap away. This makes the value produced by the
4934+
// load dead.
4935+
DCI.CombineTo(N, ResVal);
4936+
4937+
// Next, combine the load away, we give it a bogus result value but a real
4938+
// chain result. The result value is dead because the bswap is dead.
4939+
DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
4940+
4941+
// Return N so it doesn't get rechecked!
4942+
return SDValue(N, 0);
4943+
}
4944+
4945+
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
4946+
// See comment above about volatile accesses.
4947+
if (Opcode == ISD::STORE &&
4948+
!cast<StoreSDNode>(N)->isVolatile() &&
4949+
N->getOperand(1).getOpcode() == ISD::BSWAP &&
4950+
N->getOperand(1).getNode()->hasOneUse() &&
4951+
(N->getOperand(1).getValueType() == MVT::i16 ||
4952+
N->getOperand(1).getValueType() == MVT::i32 ||
4953+
N->getOperand(1).getValueType() == MVT::i64)) {
4954+
4955+
SDValue BSwapOp = N->getOperand(1).getOperand(0);
4956+
4957+
if (BSwapOp.getValueType() == MVT::i16)
4958+
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
4959+
4960+
SDValue Ops[] = {
4961+
N->getOperand(0), BSwapOp, N->getOperand(2),
4962+
DAG.getValueType(N->getOperand(1).getValueType())
4963+
};
4964+
4965+
return
4966+
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
4967+
Ops, cast<StoreSDNode>(N)->getMemoryVT(),
4968+
cast<StoreSDNode>(N)->getMemOperand());
4969+
}
4970+
49004971
return SDValue();
49014972
}
49024973

lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,19 @@ enum NodeType : unsigned {
311311
// Operand 5: the width of the field in bits (8 or 16)
312312
ATOMIC_CMP_SWAPW,
313313

314+
// Byte swapping load.
315+
//
316+
// Operand 0: the address to load from
317+
// Operand 1: the type of load (i16, i32, i64)
318+
LRV,
319+
320+
// Byte swapping store.
321+
//
322+
// Operand 0: the value to store
323+
// Operand 1: the address to store to
324+
// Operand 2: the type of store (i16, i32, i64)
325+
STRV,
326+
314327
// Prefetch from the second operand using the 4-bit control code in
315328
// the first operand. The code is 1 for a load prefetch and 2 for
316329
// a store prefetch.

lib/Target/SystemZ/SystemZInstrInfo.td

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -563,13 +563,14 @@ let hasSideEffects = 0 in {
563563

564564
// Byte-swapping loads. Unlike normal loads, these instructions are
565565
// allowed to access storage more than once.
566-
def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
567-
def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
566+
def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
567+
def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
568+
def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
568569

569570
// Likewise byte-swapping stores.
570-
def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
571-
def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
572-
GR64, 8>;
571+
def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
572+
def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
573+
def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
573574

574575
//===----------------------------------------------------------------------===//
575576
// Load address instructions

lib/Target/SystemZ/SystemZOperators.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
7979
def SDT_ZPrefetch : SDTypeProfile<0, 2,
8080
[SDTCisVT<0, i32>,
8181
SDTCisPtrTy<1>]>;
82+
def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
83+
[SDTCisInt<0>,
84+
SDTCisPtrTy<1>,
85+
SDTCisVT<2, OtherVT>]>;
86+
def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
87+
[SDTCisInt<0>,
88+
SDTCisPtrTy<1>,
89+
SDTCisVT<2, OtherVT>]>;
8290
def SDT_ZTBegin : SDTypeProfile<0, 2,
8391
[SDTCisPtrTy<0>,
8492
SDTCisVT<1, i32>]>;
@@ -191,6 +199,11 @@ def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
191199
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
192200
[SDNPHasChain, SDNPSideEffect]>;
193201

202+
def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
203+
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
204+
def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
205+
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
206+
194207
// Defined because the index is an i32 rather than a pointer.
195208
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
196209
SDT_ZInsertVectorElt>;
@@ -331,6 +344,17 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
331344
// Pattern fragments
332345
//===----------------------------------------------------------------------===//
333346

347+
def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
348+
def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
349+
def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
350+
351+
def z_strvh : PatFrag<(ops node:$src, node:$addr),
352+
(z_storebswap node:$src, node:$addr, i16)>;
353+
def z_strv : PatFrag<(ops node:$src, node:$addr),
354+
(z_storebswap node:$src, node:$addr, i32)>;
355+
def z_strvg : PatFrag<(ops node:$src, node:$addr),
356+
(z_storebswap node:$src, node:$addr, i64)>;
357+
334358
// Signed and unsigned comparisons.
335359
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
336360
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();

test/CodeGen/SystemZ/bswap-06.ll

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; Test 16-bit byteswaps from memory to registers.
2+
;
3+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4+
5+
declare i16 @llvm.bswap.i16(i16 %a)
6+
7+
; Check LRVH with no displacement.
8+
define i16 @f1(i16 *%src) {
9+
; CHECK-LABEL: f1:
10+
; CHECK: lrvh %r2, 0(%r2)
11+
; CHECK: br %r14
12+
%a = load i16 , i16 *%src
13+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
14+
ret i16 %swapped
15+
}
16+
17+
; Check the high end of the aligned LRVH range.
18+
define i16 @f2(i16 *%src) {
19+
; CHECK-LABEL: f2:
20+
; CHECK: lrvh %r2, 524286(%r2)
21+
; CHECK: br %r14
22+
%ptr = getelementptr i16, i16 *%src, i64 262143
23+
%a = load i16 , i16 *%ptr
24+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
25+
ret i16 %swapped
26+
}
27+
28+
; Check the next word up, which needs separate address logic.
29+
; Other sequences besides this one would be OK.
30+
define i16 @f3(i16 *%src) {
31+
; CHECK-LABEL: f3:
32+
; CHECK: agfi %r2, 524288
33+
; CHECK: lrvh %r2, 0(%r2)
34+
; CHECK: br %r14
35+
%ptr = getelementptr i16, i16 *%src, i64 262144
36+
%a = load i16 , i16 *%ptr
37+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
38+
ret i16 %swapped
39+
}
40+
41+
; Check the high end of the negative aligned LRVH range.
42+
define i16 @f4(i16 *%src) {
43+
; CHECK-LABEL: f4:
44+
; CHECK: lrvh %r2, -2(%r2)
45+
; CHECK: br %r14
46+
%ptr = getelementptr i16, i16 *%src, i64 -1
47+
%a = load i16 , i16 *%ptr
48+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
49+
ret i16 %swapped
50+
}
51+
52+
; Check the low end of the LRVH range.
53+
define i16 @f5(i16 *%src) {
54+
; CHECK-LABEL: f5:
55+
; CHECK: lrvh %r2, -524288(%r2)
56+
; CHECK: br %r14
57+
%ptr = getelementptr i16, i16 *%src, i64 -262144
58+
%a = load i16 , i16 *%ptr
59+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
60+
ret i16 %swapped
61+
}
62+
63+
; Check the next word down, which needs separate address logic.
64+
; Other sequences besides this one would be OK.
65+
define i16 @f6(i16 *%src) {
66+
; CHECK-LABEL: f6:
67+
; CHECK: agfi %r2, -524290
68+
; CHECK: lrvh %r2, 0(%r2)
69+
; CHECK: br %r14
70+
%ptr = getelementptr i16, i16 *%src, i64 -262145
71+
%a = load i16 , i16 *%ptr
72+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
73+
ret i16 %swapped
74+
}
75+
76+
; Check that LRVH allows an index.
77+
define i16 @f7(i64 %src, i64 %index) {
78+
; CHECK-LABEL: f7:
79+
; CHECK: lrvh %r2, 524287({{%r3,%r2|%r2,%r3}})
80+
; CHECK: br %r14
81+
%add1 = add i64 %src, %index
82+
%add2 = add i64 %add1, 524287
83+
%ptr = inttoptr i64 %add2 to i16 *
84+
%a = load i16 , i16 *%ptr
85+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
86+
ret i16 %swapped
87+
}
88+
89+
; Check that volatile accesses do not use LRVH, which might access the
90+
; storage multple times.
91+
define i16 @f8(i16 *%src) {
92+
; CHECK-LABEL: f8:
93+
; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
94+
; CHECK: lrvr %r2, [[REG]]
95+
; CHECK: br %r14
96+
%a = load volatile i16 , i16 *%src
97+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
98+
ret i16 %swapped
99+
}

test/CodeGen/SystemZ/bswap-07.ll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; Test 32-bit byteswaps from registers to memory.
2+
;
3+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4+
5+
declare i16 @llvm.bswap.i16(i16 %a)
6+
7+
; Check STRVH with no displacement.
8+
define void @f1(i16 *%dst, i16 %a) {
9+
; CHECK-LABEL: f1:
10+
; CHECK: strvh %r3, 0(%r2)
11+
; CHECK: br %r14
12+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
13+
store i16 %swapped, i16 *%dst
14+
ret void
15+
}
16+
17+
; Check the high end of the aligned STRVH range.
18+
define void @f2(i16 *%dst, i16 %a) {
19+
; CHECK-LABEL: f2:
20+
; CHECK: strvh %r3, 524286(%r2)
21+
; CHECK: br %r14
22+
%ptr = getelementptr i16, i16 *%dst, i64 262143
23+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
24+
store i16 %swapped, i16 *%ptr
25+
ret void
26+
}
27+
28+
; Check the next word up, which needs separate address logic.
29+
; Other sequences besides this one would be OK.
30+
define void @f3(i16 *%dst, i16 %a) {
31+
; CHECK-LABEL: f3:
32+
; CHECK: agfi %r2, 524288
33+
; CHECK: strvh %r3, 0(%r2)
34+
; CHECK: br %r14
35+
%ptr = getelementptr i16, i16 *%dst, i64 262144
36+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
37+
store i16 %swapped, i16 *%ptr
38+
ret void
39+
}
40+
41+
; Check the high end of the negative aligned STRVH range.
42+
define void @f4(i16 *%dst, i16 %a) {
43+
; CHECK-LABEL: f4:
44+
; CHECK: strvh %r3, -2(%r2)
45+
; CHECK: br %r14
46+
%ptr = getelementptr i16, i16 *%dst, i64 -1
47+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
48+
store i16 %swapped, i16 *%ptr
49+
ret void
50+
}
51+
52+
; Check the low end of the STRVH range.
53+
define void @f5(i16 *%dst, i16 %a) {
54+
; CHECK-LABEL: f5:
55+
; CHECK: strvh %r3, -524288(%r2)
56+
; CHECK: br %r14
57+
%ptr = getelementptr i16, i16 *%dst, i64 -262144
58+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
59+
store i16 %swapped, i16 *%ptr
60+
ret void
61+
}
62+
63+
; Check the next word down, which needs separate address logic.
64+
; Other sequences besides this one would be OK.
65+
define void @f6(i16 *%dst, i16 %a) {
66+
; CHECK-LABEL: f6:
67+
; CHECK: agfi %r2, -524290
68+
; CHECK: strvh %r3, 0(%r2)
69+
; CHECK: br %r14
70+
%ptr = getelementptr i16, i16 *%dst, i64 -262145
71+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
72+
store i16 %swapped, i16 *%ptr
73+
ret void
74+
}
75+
76+
; Check that STRVH allows an index.
77+
define void @f7(i64 %src, i64 %index, i16 %a) {
78+
; CHECK-LABEL: f7:
79+
; CHECK: strvh %r4, 524287({{%r3,%r2|%r2,%r3}})
80+
; CHECK: br %r14
81+
%add1 = add i64 %src, %index
82+
%add2 = add i64 %add1, 524287
83+
%ptr = inttoptr i64 %add2 to i16 *
84+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
85+
store i16 %swapped, i16 *%ptr
86+
ret void
87+
}
88+
89+
; Check that volatile stores do not use STRVH, which might access the
90+
; storage multple times.
91+
define void @f8(i16 *%dst, i16 %a) {
92+
; CHECK-LABEL: f8:
93+
; CHECK: lrvr [[REG:%r[0-5]]], %r3
94+
; CHECK: srl [[REG]], 16
95+
; CHECK: sth [[REG]], 0(%r2)
96+
; CHECK: br %r14
97+
%swapped = call i16 @llvm.bswap.i16(i16 %a)
98+
store volatile i16 %swapped, i16 *%dst
99+
ret void
100+
}

0 commit comments

Comments
 (0)