Skip to content

Commit ef7aad0

Browse files
committed
[SystemZ] Improve handling of ZERO_EXTEND_VECTOR_INREG.
Instead of doing multiple unpacks when zero extending vectors (e.g. v2i16 -> v2i64), benchmarks have shown that it is better to do a VPERM (vector permute) since that is only one sequential instruction on the critical path. This patch achieves this by 1. Expand ZERO_EXTEND_VECTOR_INREG into a vector shuffle with a zero vector instead of (multiple) unpacks. 2. Improve SystemZ::GeneralShuffle to perform a single unpack as the last operation if Bytes matches it. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D78486
1 parent 2c663aa commit ef7aad0

File tree

6 files changed

+242
-38
lines changed

6 files changed

+242
-38
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 160 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4467,12 +4467,22 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
44674467
}
44684468

44694469
static bool isZeroVector(SDValue N) {
4470+
if (N->getOpcode() == ISD::BITCAST)
4471+
N = N->getOperand(0);
44704472
if (N->getOpcode() == ISD::SPLAT_VECTOR)
44714473
if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
44724474
return Op->getZExtValue() == 0;
44734475
return ISD::isBuildVectorAllZeros(N.getNode());
44744476
}
44754477

4478+
// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4479+
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4480+
for (unsigned I = 0; I < Num ; I++)
4481+
if (isZeroVector(Ops[I]))
4482+
return I;
4483+
return UINT32_MAX;
4484+
}
4485+
44764486
// Bytes is a VPERM-like permute vector, except that -1 is used for
44774487
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
44784488
// VSLDB or VPERM.
@@ -4491,9 +4501,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
44914501

44924502
// Fall back on VPERM. Construct an SDNode for the permute vector. Try to
44934503
// eliminate a zero vector by reusing any zero index in the permute vector.
4494-
unsigned ZeroVecIdx =
4495-
isZeroVector(Ops[0]) ? 0 : (isZeroVector(Ops[1]) ? 1 : UINT_MAX);
4496-
if (ZeroVecIdx != UINT_MAX) {
4504+
unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4505+
if (ZeroVecIdx != UINT32_MAX) {
44974506
bool MaskFirst = true;
44984507
int ZeroIdx = -1;
44994508
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
@@ -4551,10 +4560,13 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
45514560
namespace {
45524561
// Describes a general N-operand vector shuffle.
45534562
struct GeneralShuffle {
4554-
GeneralShuffle(EVT vt) : VT(vt) {}
4563+
GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
45554564
void addUndef();
45564565
bool add(SDValue, unsigned);
45574566
SDValue getNode(SelectionDAG &, const SDLoc &);
4567+
void tryPrepareForUnpack();
4568+
bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4569+
SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
45584570

45594571
// The operands of the shuffle.
45604572
SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -4566,6 +4578,9 @@ struct GeneralShuffle {
45664578

45674579
// The type of the shuffle result.
45684580
EVT VT;
4581+
4582+
// Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4583+
unsigned UnpackFromEltSize;
45694584
};
45704585
}
45714586

@@ -4648,6 +4663,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
46484663
if (Ops.size() == 0)
46494664
return DAG.getUNDEF(VT);
46504665

4666+
// Use a single unpack if possible as the last operation.
4667+
tryPrepareForUnpack();
4668+
46514669
// Make sure that there are at least two shuffle operands.
46524670
if (Ops.size() == 1)
46534671
Ops.push_back(DAG.getUNDEF(MVT::v16i8));
@@ -4713,13 +4731,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
47134731
// to VPERM.
47144732
unsigned OpNo0, OpNo1;
47154733
SDValue Op;
4716-
if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4734+
if (unpackWasPrepared() && Ops[1].isUndef())
4735+
Op = Ops[0];
4736+
else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
47174737
Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
47184738
else
47194739
Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4740+
4741+
Op = insertUnpackIfPrepared(DAG, DL, Op);
4742+
47204743
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
47214744
}
47224745

4746+
#ifndef NDEBUG
4747+
static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
4748+
dbgs() << Msg.c_str() << " { ";
4749+
for (unsigned i = 0; i < Bytes.size(); i++)
4750+
dbgs() << Bytes[i] << " ";
4751+
dbgs() << "}\n";
4752+
}
4753+
#endif
4754+
4755+
// If the Bytes vector matches an unpack operation, prepare to do the unpack
4756+
// after all else by removing the zero vector and the effect of the unpack on
4757+
// Bytes.
4758+
void GeneralShuffle::tryPrepareForUnpack() {
4759+
uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
4760+
if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
4761+
return;
4762+
4763+
// Only do this if removing the zero vector reduces the depth, otherwise
4764+
// the critical path will increase with the final unpack.
4765+
if (Ops.size() > 2 &&
4766+
Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
4767+
return;
4768+
4769+
// Find an unpack that would allow removing the zero vector from Ops.
4770+
UnpackFromEltSize = 1;
4771+
for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
4772+
bool MatchUnpack = true;
4773+
SmallVector<int, SystemZ::VectorBytes> SrcBytes;
4774+
for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
4775+
unsigned ToEltSize = UnpackFromEltSize * 2;
4776+
bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
4777+
if (!IsZextByte)
4778+
SrcBytes.push_back(Bytes[Elt]);
4779+
if (Bytes[Elt] != -1) {
4780+
unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
4781+
if (IsZextByte != (OpNo == ZeroVecOpNo)) {
4782+
MatchUnpack = false;
4783+
break;
4784+
}
4785+
}
4786+
}
4787+
if (MatchUnpack) {
4788+
if (Ops.size() == 2) {
4789+
// Don't use unpack if a single source operand needs rearrangement.
4790+
for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
4791+
if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
4792+
UnpackFromEltSize = UINT_MAX;
4793+
return;
4794+
}
4795+
}
4796+
break;
4797+
}
4798+
}
4799+
if (UnpackFromEltSize > 4)
4800+
return;
4801+
4802+
LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
4803+
<< UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
4804+
<< ".\n";
4805+
dumpBytes(Bytes, "Original Bytes vector:"););
4806+
4807+
// Apply the unpack in reverse to the Bytes array.
4808+
unsigned B = 0;
4809+
for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
4810+
Elt += UnpackFromEltSize;
4811+
for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
4812+
Bytes[B] = Bytes[Elt];
4813+
}
4814+
while (B < SystemZ::VectorBytes)
4815+
Bytes[B++] = -1;
4816+
4817+
// Remove the zero vector from Ops
4818+
Ops.erase(&Ops[ZeroVecOpNo]);
4819+
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4820+
if (Bytes[I] >= 0) {
4821+
unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4822+
if (OpNo > ZeroVecOpNo)
4823+
Bytes[I] -= SystemZ::VectorBytes;
4824+
}
4825+
4826+
LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
4827+
dbgs() << "\n";);
4828+
}
4829+
4830+
SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
4831+
const SDLoc &DL,
4832+
SDValue Op) {
4833+
if (!unpackWasPrepared())
4834+
return Op;
4835+
unsigned InBits = UnpackFromEltSize * 8;
4836+
EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
4837+
SystemZ::VectorBits / InBits);
4838+
SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
4839+
unsigned OutBits = InBits * 2;
4840+
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
4841+
SystemZ::VectorBits / OutBits);
4842+
return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
4843+
}
4844+
47234845
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
47244846
static bool isScalarToVector(SDValue Op) {
47254847
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
@@ -5114,9 +5236,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
51145236
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
51155237
}
51165238

5117-
SDValue
5118-
SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
5119-
unsigned UnpackHigh) const {
5239+
SDValue SystemZTargetLowering::
5240+
lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
51205241
SDValue PackedOp = Op.getOperand(0);
51215242
EVT OutVT = Op.getValueType();
51225243
EVT InVT = PackedOp.getValueType();
@@ -5126,11 +5247,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
51265247
FromBits *= 2;
51275248
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
51285249
SystemZ::VectorBits / FromBits);
5129-
PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
5250+
PackedOp =
5251+
DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
51305252
} while (FromBits != ToBits);
51315253
return PackedOp;
51325254
}
51335255

5256+
// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5257+
SDValue SystemZTargetLowering::
5258+
lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5259+
SDValue PackedOp = Op.getOperand(0);
5260+
SDLoc DL(Op);
5261+
EVT OutVT = Op.getValueType();
5262+
EVT InVT = PackedOp.getValueType();
5263+
unsigned InNumElts = InVT.getVectorNumElements();
5264+
unsigned OutNumElts = OutVT.getVectorNumElements();
5265+
unsigned NumInPerOut = InNumElts / OutNumElts;
5266+
5267+
SDValue ZeroVec =
5268+
DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5269+
5270+
SmallVector<int, 16> Mask(InNumElts);
5271+
unsigned ZeroVecElt = InNumElts;
5272+
for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5273+
unsigned MaskElt = PackedElt * NumInPerOut;
5274+
unsigned End = MaskElt + NumInPerOut - 1;
5275+
for (; MaskElt < End; MaskElt++)
5276+
Mask[MaskElt] = ZeroVecElt++;
5277+
Mask[MaskElt] = PackedElt;
5278+
}
5279+
SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5280+
return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5281+
}
5282+
51345283
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
51355284
unsigned ByScalar) const {
51365285
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -5296,9 +5445,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
52965445
case ISD::EXTRACT_VECTOR_ELT:
52975446
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
52985447
case ISD::SIGN_EXTEND_VECTOR_INREG:
5299-
return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
5448+
return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
53005449
case ISD::ZERO_EXTEND_VECTOR_INREG:
5301-
return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
5450+
return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
53025451
case ISD::SHL:
53035452
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
53045453
case ISD::SRL:

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,8 +627,8 @@ class SystemZTargetLowering : public TargetLowering {
627627
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
628628
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
629629
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
630-
SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
631-
unsigned UnpackHigh) const;
630+
SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
631+
SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
632632
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
633633

634634
bool canTreatAsByteVector(EVT VT) const;

llvm/test/CodeGen/SystemZ/vec-move-16.ll

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ define <4 x i32> @f4(<4 x i1> *%ptr) {
4040
; Test a v4i8->v4i32 extension.
4141
define <4 x i32> @f5(<4 x i8> *%ptr) {
4242
; CHECK-LABEL: f5:
43+
; CHECK: larl %r1, .LCPI4_0
4344
; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
44-
; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
45-
; CHECK: vuplhh %v24, [[REG2]]
45+
; CHECK: vl %v1, 0(%r1), 3
46+
; CHECK: vperm %v24, %v1, [[REG1]], %v1
4647
; CHECK: br %r14
4748
%val = load <4 x i8>, <4 x i8> *%ptr
4849
%ret = zext <4 x i8> %val to <4 x i32>
@@ -71,10 +72,10 @@ define <2 x i64> @f7(<2 x i1> *%ptr) {
7172
; Test a v2i8->v2i64 extension.
7273
define <2 x i64> @f8(<2 x i8> *%ptr) {
7374
; CHECK-LABEL: f8:
74-
; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
75-
; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
76-
; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]]
77-
; CHECK: vuplhf %v24, [[REG3]]
75+
; CHECK: larl %r1, .LCPI7_0
76+
; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
77+
; CHECK: vl %v1, 0(%r1), 3
78+
; CHECK: vperm %v24, %v1, [[REG1]], %v1
7879
; CHECK: br %r14
7980
%val = load <2 x i8>, <2 x i8> *%ptr
8081
%ret = zext <2 x i8> %val to <2 x i64>
@@ -84,9 +85,10 @@ define <2 x i64> @f8(<2 x i8> *%ptr) {
8485
; Test a v2i16->v2i64 extension.
8586
define <2 x i64> @f9(<2 x i16> *%ptr) {
8687
; CHECK-LABEL: f9:
87-
; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
88-
; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]]
89-
; CHECK: vuplhf %v24, [[REG2]]
88+
; CHECK: larl %r1, .LCPI8_0
89+
; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
90+
; CHECK: vl %v1, 0(%r1), 3
91+
; CHECK: vperm %v24, %v1, [[REG1]], %v1
9092
; CHECK: br %r14
9193
%val = load <2 x i16>, <2 x i16> *%ptr
9294
%ret = zext <2 x i16> %val to <2 x i64>

llvm/test/CodeGen/SystemZ/vec-move-23.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ define void @fun3(<4 x i16> %Src, <4 x float>* %Dst) {
6868

6969
define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
7070
; CHECK-LABEL: fun4:
71-
; CHECK: vuplhb %v0, %v24
72-
; CHECK-NEXT: vuplhh %v0, %v0
73-
; CHECK-NEXT: vuplhf %v0, %v0
71+
; CHECK: larl %r1, .LCPI4_0
72+
; CHECK-NEXT: vl %v0, 0(%r1), 3
73+
; CHECK-NEXT: vperm %v0, %v0, %v24, %v0
7474
; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
7575
; CHECK-NEXT: vst %v0, 0(%r2), 3
7676
; CHECK-NEXT: br %r14
@@ -81,8 +81,9 @@ define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
8181

8282
define void @fun5(<2 x i16> %Src, <2 x double>* %Dst) {
8383
; CHECK-LABEL: fun5:
84-
; CHECK: vuplhh %v0, %v24
85-
; CHECK-NEXT: vuplhf %v0, %v0
84+
; CHECK: larl %r1, .LCPI5_0
85+
; CHECK-NEXT: vl %v0, 0(%r1), 3
86+
; CHECK-NEXT: vperm %v0, %v0, %v24, %v0
8687
; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
8788
; CHECK-NEXT: vst %v0, 0(%r2), 3
8889
; CHECK-NEXT: br %r14
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
2+
;
3+
; Test that vperm is not used if a single unpack is enough.
4+
5+
define <4 x i32> @fun0(<4 x i32>* %Src) nounwind {
6+
; CHECK-LABEL: fun0:
7+
; CHECK-NOT: vperm
8+
%tmp = load <4 x i32>, <4 x i32>* %Src
9+
%tmp2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
10+
ret <4 x i32> %tmp2
11+
}
12+
13+
define void @fun1(i8 %Src, <32 x i8>* %Dst) nounwind {
14+
; CHECK-LABEL: fun1:
15+
; CHECK-NOT: vperm
16+
%I0 = insertelement <16 x i8> undef, i8 %Src, i32 0
17+
%I1 = insertelement <16 x i8> %I0, i8 %Src, i32 1
18+
%I2 = insertelement <16 x i8> %I1, i8 %Src, i32 2
19+
%I3 = insertelement <16 x i8> %I2, i8 %Src, i32 3
20+
%I4 = insertelement <16 x i8> %I3, i8 %Src, i32 4
21+
%I5 = insertelement <16 x i8> %I4, i8 %Src, i32 5
22+
%I6 = insertelement <16 x i8> %I5, i8 %Src, i32 6
23+
%I7 = insertelement <16 x i8> %I6, i8 %Src, i32 7
24+
%I8 = insertelement <16 x i8> %I7, i8 %Src, i32 8
25+
%I9 = insertelement <16 x i8> %I8, i8 %Src, i32 9
26+
%I10 = insertelement <16 x i8> %I9, i8 %Src, i32 10
27+
%I11 = insertelement <16 x i8> %I10, i8 %Src, i32 11
28+
%I12 = insertelement <16 x i8> %I11, i8 %Src, i32 12
29+
%I13 = insertelement <16 x i8> %I12, i8 %Src, i32 13
30+
%I14 = insertelement <16 x i8> %I13, i8 %Src, i32 14
31+
%I15 = insertelement <16 x i8> %I14, i8 %Src, i32 15
32+
33+
%tmp = shufflevector <16 x i8> zeroinitializer,
34+
<16 x i8> %I15,
35+
<32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
36+
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
37+
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
38+
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
39+
%tmp9 = shufflevector <32 x i8> undef,
40+
<32 x i8> %tmp,
41+
<32 x i32> <i32 33, i32 32, i32 48, i32 49, i32 1, i32 17, i32 50, i32 51,
42+
i32 2, i32 18, i32 52, i32 53, i32 3, i32 19, i32 54, i32 55,
43+
i32 4, i32 20, i32 56, i32 57, i32 5, i32 21, i32 58, i32 59,
44+
i32 6, i32 22, i32 60, i32 61, i32 7, i32 62, i32 55, i32 63>
45+
46+
store <32 x i8> %tmp9, <32 x i8>* %Dst
47+
ret void
48+
}
49+

0 commit comments

Comments
 (0)