Skip to content

Commit ab976a1

Browse files
authored
PreISelIntrinsicLowering: Lower llvm.exp/llvm.exp2 to a loop if scalable vec arg (#117568)
1 parent 3861b9d commit ab976a1

File tree

9 files changed

+179
-0
lines changed

9 files changed

+179
-0
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2123,6 +2123,10 @@ class TargetLoweringBase {
21232123
/// Get the ISD node that corresponds to the Instruction class opcode.
21242124
int InstructionOpcodeToISD(unsigned Opcode) const;
21252125

2126+
/// Get the ISD node that corresponds to the Intrinsic ID. Returns
2127+
/// ISD::DELETED_NODE by default for an unsupported Intrinsic ID.
2128+
int IntrinsicIDToISD(Intrinsic::ID ID) const;
2129+
21262130
/// @}
21272131

21282132
//===--------------------------------------------------------------------===//
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===- llvm/Transforms/Utils/LowerVectorIntrinsics.h ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Lower intrinsics with a scalable vector arg to loops.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H
14+
#define LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H
15+
16+
#include <cstdint>
17+
#include <optional>
18+
19+
namespace llvm {
20+
21+
class CallInst;
22+
class Module;
23+
24+
/// Lower \p CI as a loop. \p CI is a unary intrinsic with a vector argument and
25+
/// is deleted and replaced with a loop.
26+
bool lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI);
27+
28+
} // namespace llvm
29+
30+
#endif

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "llvm/Target/TargetMachine.h"
3434
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
3535
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
36+
#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
3637

3738
using namespace llvm;
3839

@@ -453,6 +454,19 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
453454
case Intrinsic::objc_sync_exit:
454455
Changed |= lowerObjCCall(F, "objc_sync_exit");
455456
break;
457+
case Intrinsic::exp:
458+
case Intrinsic::exp2:
459+
Changed |= forEachCall(F, [&](CallInst *CI) {
460+
Type *Ty = CI->getArgOperand(0)->getType();
461+
if (!isa<ScalableVectorType>(Ty))
462+
return false;
463+
const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
464+
unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID());
465+
if (!TL->isOperationExpand(Op, EVT::getEVT(Ty)))
466+
return false;
467+
return lowerUnaryVectorIntrinsicAsLoop(M, CI);
468+
});
469+
break;
456470
}
457471
}
458472
return Changed;

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,17 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
18411841
llvm_unreachable("Unknown instruction type encountered!");
18421842
}
18431843

1844+
int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const {
1845+
switch (ID) {
1846+
case Intrinsic::exp:
1847+
return ISD::FEXP;
1848+
case Intrinsic::exp2:
1849+
return ISD::FEXP2;
1850+
default:
1851+
return ISD::DELETED_NODE;
1852+
}
1853+
}
1854+
18441855
Value *
18451856
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
18461857
bool UseTLS) const {

llvm/lib/Transforms/Utils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ add_llvm_component_library(LLVMTransformUtils
5656
LowerInvoke.cpp
5757
LowerMemIntrinsics.cpp
5858
LowerSwitch.cpp
59+
LowerVectorIntrinsics.cpp
5960
MatrixUtils.cpp
6061
MemoryOpRemark.cpp
6162
MemoryTaggingSupport.cpp
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
//===- LowerVectorIntrinsics.cpp ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
10+
#include "llvm/IR/IRBuilder.h"
11+
#include "llvm/IR/IntrinsicInst.h"
12+
#include "llvm/Support/Debug.h"
13+
14+
#define DEBUG_TYPE "lower-vector-intrinsics"
15+
16+
using namespace llvm;
17+
18+
bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
19+
Type *ArgTy = CI->getArgOperand(0)->getType();
20+
VectorType *VecTy = cast<VectorType>(ArgTy);
21+
22+
BasicBlock *PreLoopBB = CI->getParent();
23+
BasicBlock *PostLoopBB = nullptr;
24+
Function *ParentFunc = PreLoopBB->getParent();
25+
LLVMContext &Ctx = PreLoopBB->getContext();
26+
27+
PostLoopBB = PreLoopBB->splitBasicBlock(CI);
28+
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
29+
PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
30+
31+
// Loop preheader
32+
IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
33+
Value *LoopEnd = nullptr;
34+
if (auto *ScalableVecTy = dyn_cast<ScalableVectorType>(VecTy)) {
35+
Value *VScale = PreLoopBuilder.CreateVScale(
36+
ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
37+
Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
38+
ScalableVecTy->getMinNumElements());
39+
LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
40+
} else {
41+
FixedVectorType *FixedVecTy = cast<FixedVectorType>(VecTy);
42+
LoopEnd = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
43+
FixedVecTy->getNumElements());
44+
}
45+
46+
// Loop body
47+
IRBuilder<> LoopBuilder(LoopBB);
48+
Type *Int64Ty = LoopBuilder.getInt64Ty();
49+
50+
PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
51+
LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
52+
PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
53+
Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
54+
55+
Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
56+
Function *Exp = Intrinsic::getOrInsertDeclaration(&M, CI->getIntrinsicID(),
57+
VecTy->getElementType());
58+
Value *Res = LoopBuilder.CreateCall(Exp, Elem);
59+
Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
60+
Vec->addIncoming(NewVec, LoopBB);
61+
62+
Value *One = ConstantInt::get(Int64Ty, 1U);
63+
Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
64+
LoopIndex->addIncoming(NextLoopIndex, LoopBB);
65+
66+
Value *ExitCond =
67+
LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
68+
LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);
69+
70+
CI->replaceAllUsesWith(NewVec);
71+
CI->eraseFromParent();
72+
return true;
73+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
3+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
4+
target triple = "aarch64"
5+
6+
define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
7+
; CHECK-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
8+
; CHECK-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
9+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
10+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
11+
; CHECK-NEXT: br label %[[BB3:.*]]
12+
; CHECK: [[BB3]]:
13+
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
14+
; CHECK-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
15+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
16+
; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
17+
; CHECK-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
18+
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
19+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
20+
; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
21+
; CHECK: [[BB11]]:
22+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP8]]
23+
;
24+
%output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
25+
ret <vscale x 4 x float> %output
26+
}
27+
28+
define <4 x float> @fixed_vec_exp(<4 x float> %input) {
29+
; CHECK-LABEL: define <4 x float> @fixed_vec_exp(
30+
; CHECK-SAME: <4 x float> [[INPUT:%.*]]) {
31+
; CHECK-NEXT: [[OUTPUT:%.*]] = call <4 x float> @llvm.exp.v4f32(<4 x float> [[INPUT]])
32+
; CHECK-NEXT: ret <4 x float> [[OUTPUT]]
33+
;
34+
%output = call <4 x float> @llvm.exp.v4f32(<4 x float> %input)
35+
ret <4 x float> %output
36+
}
37+
38+
declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
39+
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0
40+
41+
; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
42+
; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
43+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not "AArch64" in config.root.targets:
2+
config.unsupported = True

llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ static_library("Utils") {
6464
"LowerInvoke.cpp",
6565
"LowerMemIntrinsics.cpp",
6666
"LowerSwitch.cpp",
67+
"LowerVectorIntrinsics.cpp",
6768
"MatrixUtils.cpp",
6869
"Mem2Reg.cpp",
6970
"MemoryOpRemark.cpp",

0 commit comments

Comments
 (0)