|
54 | 54 | #include "llvm/IR/IRBuilder.h"
|
55 | 55 | #include "llvm/IR/Instruction.h"
|
56 | 56 | #include "llvm/IR/Instructions.h"
|
| 57 | +#include "llvm/IR/IntrinsicInst.h" |
57 | 58 | #include "llvm/IR/Intrinsics.h"
|
58 | 59 | #include "llvm/IR/Module.h"
|
59 | 60 | #include "llvm/IR/OperandTraits.h"
|
| 61 | +#include "llvm/IR/PatternMatch.h" |
60 | 62 | #include "llvm/IR/Type.h"
|
61 | 63 | #include "llvm/IR/Use.h"
|
62 | 64 | #include "llvm/IR/Value.h"
|
|
86 | 88 | #include <vector>
|
87 | 89 |
|
88 | 90 | using namespace llvm;
|
| 91 | +using namespace llvm::PatternMatch; |
89 | 92 |
|
90 | 93 | #define DEBUG_TYPE "aarch64-lower"
|
91 | 94 |
|
@@ -8270,6 +8273,110 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
|
8270 | 8273 | return true;
|
8271 | 8274 | }
|
8272 | 8275 |
|
| 8276 | +/// Check if both Op1 and Op2 are shufflevector extracts of either the lower |
| 8277 | +/// or upper half of the vector elements. |
| 8278 | +static bool areExtractShuffleVectors(Value *Op1, Value *Op2) { |
| 8279 | + auto areTypesHalfed = [](Value *FullV, Value *HalfV) { |
| 8280 | + auto *FullVT = cast<VectorType>(FullV->getType()); |
| 8281 | + auto *HalfVT = cast<VectorType>(HalfV->getType()); |
| 8282 | + return FullVT->getBitWidth() == 2 * HalfVT->getBitWidth(); |
| 8283 | + }; |
| 8284 | + |
| 8285 | + auto extractHalf = [](Value *FullV, Value *HalfV) { |
| 8286 | + auto *FullVT = cast<VectorType>(FullV->getType()); |
| 8287 | + auto *HalfVT = cast<VectorType>(HalfV->getType()); |
| 8288 | + return FullVT->getNumElements() == 2 * HalfVT->getNumElements(); |
| 8289 | + }; |
| 8290 | + |
| 8291 | + Constant *M1, *M2; |
| 8292 | + Value *S1Op1, *S2Op1; |
| 8293 | + if (!match(Op1, m_ShuffleVector(m_Value(S1Op1), m_Undef(), m_Constant(M1))) || |
| 8294 | + !match(Op2, m_ShuffleVector(m_Value(S2Op1), m_Undef(), m_Constant(M2)))) |
| 8295 | + return false; |
| 8296 | + |
| 8297 | + // Check that the operands are half as wide as the result and we extract |
| 8298 | + // half of the elements of the input vectors. |
| 8299 | + if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) || |
| 8300 | + !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2)) |
| 8301 | + return false; |
| 8302 | + |
| 8303 | + // Check the mask extracts either the lower or upper half of vector |
| 8304 | + // elements. |
| 8305 | + int M1Start = -1; |
| 8306 | + int M2Start = -1; |
| 8307 | + int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2; |
| 8308 | + if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) || |
| 8309 | + !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) || |
| 8310 | + M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2))) |
| 8311 | + return false; |
| 8312 | + |
| 8313 | + return true; |
| 8314 | +} |
| 8315 | + |
| 8316 | +/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth |
| 8317 | +/// of the vector elements. |
| 8318 | +static bool areExtractExts(Value *Ext1, Value *Ext2) { |
| 8319 | + auto areExtDoubled = [](Instruction *Ext) { |
| 8320 | + return Ext->getType()->getScalarSizeInBits() == |
| 8321 | + 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits(); |
| 8322 | + }; |
| 8323 | + |
| 8324 | + if (!match(Ext1, m_ZExtOrSExt(m_Value())) || |
| 8325 | + !match(Ext2, m_ZExtOrSExt(m_Value())) || |
| 8326 | + !areExtDoubled(cast<Instruction>(Ext1)) || |
| 8327 | + !areExtDoubled(cast<Instruction>(Ext2))) |
| 8328 | + return false; |
| 8329 | + |
| 8330 | + return true; |
| 8331 | +} |
| 8332 | + |
| 8333 | +/// Check if sinking \p I's operands to I's basic block is profitable, because |
| 8334 | +/// the operands can be folded into a target instruction, e.g. |
| 8335 | +/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2). |
| 8336 | +bool AArch64TargetLowering::shouldSinkOperands( |
| 8337 | + Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
| 8338 | + if (!I->getType()->isVectorTy()) |
| 8339 | + return false; |
| 8340 | + |
| 8341 | + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { |
| 8342 | + switch (II->getIntrinsicID()) { |
| 8343 | + case Intrinsic::aarch64_neon_umull: |
| 8344 | + if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) |
| 8345 | + return false; |
| 8346 | + Ops.push_back(&II->getOperandUse(0)); |
| 8347 | + Ops.push_back(&II->getOperandUse(1)); |
| 8348 | + return true; |
| 8349 | + default: |
| 8350 | + return false; |
| 8351 | + } |
| 8352 | + } |
| 8353 | + |
| 8354 | + switch (I->getOpcode()) { |
| 8355 | + case Instruction::Sub: |
| 8356 | + case Instruction::Add: { |
| 8357 | + if (!areExtractExts(I->getOperand(0), I->getOperand(1))) |
| 8358 | + return false; |
| 8359 | + |
| 8360 | + // If the exts' operands extract either the lower or upper elements, we |
| 8361 | + // can sink them too. |
| 8362 | + auto Ext1 = cast<Instruction>(I->getOperand(0)); |
| 8363 | + auto Ext2 = cast<Instruction>(I->getOperand(1)); |
| 8364 | + if (areExtractShuffleVectors(Ext1, Ext2)) { |
| 8365 | + Ops.push_back(&Ext1->getOperandUse(0)); |
| 8366 | + Ops.push_back(&Ext2->getOperandUse(0)); |
| 8367 | + } |
| 8368 | + |
| 8369 | + Ops.push_back(&I->getOperandUse(0)); |
| 8370 | + Ops.push_back(&I->getOperandUse(1)); |
| 8371 | + |
| 8372 | + return true; |
| 8373 | + } |
| 8374 | + default: |
| 8375 | + return false; |
| 8376 | + } |
| 8377 | + return false; |
| 8378 | +} |
| 8379 | + |
8273 | 8380 | bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
|
8274 | 8381 | unsigned &RequiredAligment) const {
|
8275 | 8382 | if (!LoadedType.isSimple() ||
|
|
0 commit comments