Skip to content

Commit f65866d

Browse files
bwlodarczsys-ce-bb
authored andcommitted
Support for llvm.vector.reduce.* intrinsics (#2198)
A set of llvm.vector.reduce.* intrinsics doesn't have straight forward operation equivalent on the SPIRV side. The easiest solution to this problem is to use scalar operation on each pair of vector elements and repeat until there is only one value. Original commit: KhronosGroup/SPIRV-LLVM-Translator@fe088cd
1 parent 84d92de commit f65866d

File tree

16 files changed

+8412
-0
lines changed

16 files changed

+8412
-0
lines changed

llvm-spirv/lib/SPIRV/SPIRVWriter.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4077,6 +4077,122 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II,
40774077
transValue(II->getArgOperand(0), BB),
40784078
transValue(II->getArgOperand(1), BB), BB);
40794079
}
4080+
case Intrinsic::vector_reduce_add:
4081+
case Intrinsic::vector_reduce_mul:
4082+
case Intrinsic::vector_reduce_and:
4083+
case Intrinsic::vector_reduce_or:
4084+
case Intrinsic::vector_reduce_xor: {
4085+
Op Op;
4086+
if (IID == Intrinsic::vector_reduce_add) {
4087+
Op = OpIAdd;
4088+
} else if (IID == Intrinsic::vector_reduce_mul) {
4089+
Op = OpIMul;
4090+
} else if (IID == Intrinsic::vector_reduce_and) {
4091+
Op = OpBitwiseAnd;
4092+
} else if (IID == Intrinsic::vector_reduce_or) {
4093+
Op = OpBitwiseOr;
4094+
} else {
4095+
Op = OpBitwiseXor;
4096+
}
4097+
VectorType *VecTy = cast<VectorType>(II->getArgOperand(0)->getType());
4098+
SPIRVValue *VecSVal = transValue(II->getArgOperand(0), BB);
4099+
SPIRVTypeInt *ResultSType =
4100+
BM->addIntegerType(VecTy->getElementType()->getIntegerBitWidth());
4101+
SPIRVTypeInt *I32STy = BM->addIntegerType(32);
4102+
unsigned VecSize = VecTy->getElementCount().getFixedValue();
4103+
SmallVector<SPIRVValue *, 16> Extracts(VecSize);
4104+
for (unsigned Idx = 0; Idx < VecSize; ++Idx) {
4105+
Extracts[Idx] = BM->addVectorExtractDynamicInst(
4106+
VecSVal, BM->addIntegerConstant(I32STy, Idx), BB);
4107+
}
4108+
unsigned Counter = VecSize >> 1;
4109+
while (Counter != 0) {
4110+
for (unsigned Idx = 0; Idx < Counter; ++Idx) {
4111+
Extracts[Idx] = BM->addBinaryInst(Op, ResultSType, Extracts[Idx << 1],
4112+
Extracts[(Idx << 1) + 1], BB);
4113+
}
4114+
Counter >>= 1;
4115+
}
4116+
if ((VecSize & 1) != 0) {
4117+
Extracts[0] = BM->addBinaryInst(Op, ResultSType, Extracts[0],
4118+
Extracts[VecSize - 1], BB);
4119+
}
4120+
return Extracts[0];
4121+
}
4122+
case Intrinsic::vector_reduce_fadd:
4123+
case Intrinsic::vector_reduce_fmul: {
4124+
Op Op = IID == Intrinsic::vector_reduce_fadd ? OpFAdd : OpFMul;
4125+
VectorType *VecTy = cast<VectorType>(II->getArgOperand(1)->getType());
4126+
SPIRVValue *VecSVal = transValue(II->getArgOperand(1), BB);
4127+
SPIRVValue *StartingSVal = transValue(II->getArgOperand(0), BB);
4128+
SPIRVTypeInt *I32STy = BM->addIntegerType(32);
4129+
unsigned VecSize = VecTy->getElementCount().getFixedValue();
4130+
SmallVector<SPIRVValue *, 16> Extracts(VecSize);
4131+
for (unsigned Idx = 0; Idx < VecSize; ++Idx) {
4132+
Extracts[Idx] = BM->addVectorExtractDynamicInst(
4133+
VecSVal, BM->addIntegerConstant(I32STy, Idx), BB);
4134+
}
4135+
SPIRVValue *V = BM->addBinaryInst(Op, StartingSVal->getType(), StartingSVal,
4136+
Extracts[0], BB);
4137+
for (unsigned Idx = 1; Idx < VecSize; ++Idx) {
4138+
V = BM->addBinaryInst(Op, StartingSVal->getType(), V, Extracts[Idx], BB);
4139+
}
4140+
return V;
4141+
}
4142+
case Intrinsic::vector_reduce_smax:
4143+
case Intrinsic::vector_reduce_smin:
4144+
case Intrinsic::vector_reduce_umax:
4145+
case Intrinsic::vector_reduce_umin:
4146+
case Intrinsic::vector_reduce_fmax:
4147+
case Intrinsic::vector_reduce_fmin:
4148+
case Intrinsic::vector_reduce_fmaximum:
4149+
case Intrinsic::vector_reduce_fminimum: {
4150+
Op Op;
4151+
if (IID == Intrinsic::vector_reduce_smax) {
4152+
Op = OpSGreaterThan;
4153+
} else if (IID == Intrinsic::vector_reduce_smin) {
4154+
Op = OpSLessThan;
4155+
} else if (IID == Intrinsic::vector_reduce_umax) {
4156+
Op = OpUGreaterThan;
4157+
} else if (IID == Intrinsic::vector_reduce_umin) {
4158+
Op = OpULessThan;
4159+
} else if (IID == Intrinsic::vector_reduce_fmax) {
4160+
Op = OpFOrdGreaterThan;
4161+
} else if (IID == Intrinsic::vector_reduce_fmin) {
4162+
Op = OpFOrdLessThan;
4163+
} else if (IID == Intrinsic::vector_reduce_fmaximum) {
4164+
Op = OpFUnordGreaterThan;
4165+
} else {
4166+
Op = OpFUnordLessThan;
4167+
}
4168+
VectorType *VecTy = cast<VectorType>(II->getArgOperand(0)->getType());
4169+
SPIRVValue *VecSVal = transValue(II->getArgOperand(0), BB);
4170+
SPIRVType *BoolSTy = transType(Type::getInt1Ty(II->getContext()));
4171+
SPIRVTypeInt *I32STy = BM->addIntegerType(32);
4172+
unsigned VecSize = VecTy->getElementCount().getFixedValue();
4173+
SmallVector<SPIRVValue *, 16> Extracts(VecSize);
4174+
for (unsigned Idx = 0; Idx < VecSize; ++Idx) {
4175+
Extracts[Idx] = BM->addVectorExtractDynamicInst(
4176+
VecSVal, BM->addIntegerConstant(I32STy, Idx), BB);
4177+
}
4178+
unsigned Counter = VecSize >> 1;
4179+
while (Counter != 0) {
4180+
for (unsigned Idx = 0; Idx < Counter; ++Idx) {
4181+
SPIRVValue *Cond = BM->addBinaryInst(Op, BoolSTy, Extracts[Idx << 1],
4182+
Extracts[(Idx << 1) + 1], BB);
4183+
Extracts[Idx] = BM->addSelectInst(Cond, Extracts[Idx << 1],
4184+
Extracts[(Idx << 1) + 1], BB);
4185+
}
4186+
Counter >>= 1;
4187+
}
4188+
if ((VecSize & 1) != 0) {
4189+
SPIRVValue *Cond = BM->addBinaryInst(Op, BoolSTy, Extracts[0],
4190+
Extracts[VecSize - 1], BB);
4191+
Extracts[0] =
4192+
BM->addSelectInst(Cond, Extracts[0], Extracts[VecSize - 1], BB);
4193+
}
4194+
return Extracts[0];
4195+
}
40804196
case Intrinsic::memset: {
40814197
// Generally there is no direct mapping of memset to SPIR-V. But it turns
40824198
// out that memset is emitted by Clang for initialization in default

0 commit comments

Comments
 (0)