Skip to content

Commit 402b76f

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:4434253f0fa6 into amd-gfx:7f370063677b
Local branch amd-gfx 7f37006 Merged main:52db7e27458f into amd-gfx:b433a27d80cf Remote branch main 4434253 [Bazel] disable preload-library.mlir test
2 parents 7f37006 + 4434253 commit 402b76f

File tree

33 files changed

+697
-128
lines changed

33 files changed

+697
-128
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include "llvm/Transforms/Scalar/EarlyCSE.h"
7979
#include "llvm/Transforms/Scalar/GVN.h"
8080
#include "llvm/Transforms/Scalar/JumpThreading.h"
81+
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
8182
#include "llvm/Transforms/Utils/Debugify.h"
8283
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
8384
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1108,6 +1109,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
11081109
return;
11091110
}
11101111

1112+
if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
1113+
LangOpts.HIPStdParInterposeAlloc)
1114+
MPM.addPass(HipStdParAllocationInterpositionPass());
1115+
11111116
// Now that we have all of the passes ready, run them.
11121117
{
11131118
PrettyStackTraceString CrashInfo("Optimizer");

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2327,6 +2327,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
23272327
return nullptr;
23282328
}
23292329

2330+
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2331+
const FunctionDecl *FD) {
2332+
auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2333+
auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2334+
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2335+
2336+
SmallVector<Value *, 16> Args;
2337+
for (auto &&FormalTy : FnTy->params())
2338+
Args.push_back(llvm::PoisonValue::get(FormalTy));
2339+
2340+
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2341+
}
2342+
23302343
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
23312344
const CallExpr *E,
23322345
ReturnValueSlot ReturnValue) {
@@ -5765,6 +5778,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
57655778
llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
57665779
}
57675780

5781+
if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
5782+
return EmitHipStdParUnsupportedBuiltin(this, FD);
5783+
57685784
ErrorUnsupported(E, "builtin function");
57695785

57705786
// Unknown builtin, for now just dump it out and return undef.
@@ -5775,6 +5791,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
57755791
unsigned BuiltinID, const CallExpr *E,
57765792
ReturnValueSlot ReturnValue,
57775793
llvm::Triple::ArchType Arch) {
5794+
// When compiling in HipStdPar mode we have to be conservative in rejecting
5795+
// target specific features in the FE, and defer the possible error to the
5796+
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
5797+
// referenced by an accelerator executable function, we emit an error.
5798+
// Returning nullptr here leads to the builtin being handled in
5799+
// EmitStdParUnsupportedBuiltin.
5800+
if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
5801+
Arch != CGF->getTarget().getTriple().getArch())
5802+
return nullptr;
5803+
57785804
switch (Arch) {
57795805
case llvm::Triple::arm:
57805806
case llvm::Triple::armeb:

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,6 +2420,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
24202420
}
24212421
}
24222422

2423+
static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
2424+
const AsmStmt &S) {
2425+
constexpr auto Name = "__ASM__hipstdpar_unsupported";
2426+
2427+
StringRef Asm;
2428+
if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
2429+
Asm = GCCAsm->getAsmString()->getString();
2430+
2431+
auto &Ctx = CGF->CGM.getLLVMContext();
2432+
2433+
auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
2434+
auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
2435+
{StrTy->getType()}, false);
2436+
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2437+
2438+
CGF->Builder.CreateCall(UBF, {StrTy});
2439+
}
2440+
24232441
void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
24242442
// Pop all cleanup blocks at the end of the asm statement.
24252443
CodeGenFunction::RunCleanupsScope Cleanups(*this);
@@ -2431,27 +2449,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
24312449
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
24322450
SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;
24332451

2434-
for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
2452+
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
2453+
bool IsValidTargetAsm = true;
2454+
for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
24352455
StringRef Name;
24362456
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
24372457
Name = GAS->getOutputName(i);
24382458
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
24392459
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
2440-
assert(IsValid && "Failed to parse output constraint");
2460+
if (IsHipStdPar && !IsValid)
2461+
IsValidTargetAsm = false;
2462+
else
2463+
assert(IsValid && "Failed to parse output constraint");
24412464
OutputConstraintInfos.push_back(Info);
24422465
}
24432466

2444-
for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
2467+
for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
24452468
StringRef Name;
24462469
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
24472470
Name = GAS->getInputName(i);
24482471
TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
24492472
bool IsValid =
24502473
getTarget().validateInputConstraint(OutputConstraintInfos, Info);
2451-
assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
2474+
if (IsHipStdPar && !IsValid)
2475+
IsValidTargetAsm = false;
2476+
else
2477+
assert(IsValid && "Failed to parse input constraint");
24522478
InputConstraintInfos.push_back(Info);
24532479
}
24542480

2481+
if (!IsValidTargetAsm)
2482+
return EmitHipStdParUnsupportedAsm(this, S);
2483+
24552484
std::string Constraints;
24562485

24572486
std::vector<LValue> ResultRegDests;

clang/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
1111
Extensions
1212
FrontendHLSL
1313
FrontendOpenMP
14+
HIPStdPar
1415
IPO
1516
IRPrinter
1617
IRReader

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2594,10 +2594,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
25942594
std::string MissingFeature;
25952595
llvm::StringMap<bool> CallerFeatureMap;
25962596
CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD);
2597+
// When compiling in HipStdPar mode we have to be conservative in rejecting
2598+
// target specific features in the FE, and defer the possible error to the
2599+
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
2600+
// referenced by an accelerator executable function, we emit an error.
2601+
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
25972602
if (BuiltinID) {
25982603
StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
25992604
if (!Builtin::evaluateRequiredTargetFeatures(
2600-
FeatureList, CallerFeatureMap)) {
2605+
FeatureList, CallerFeatureMap) && !IsHipStdPar) {
26012606
CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
26022607
<< TargetDecl->getDeclName()
26032608
<< FeatureList;
@@ -2630,7 +2635,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
26302635
return false;
26312636
}
26322637
return true;
2633-
}))
2638+
}) && !IsHipStdPar)
26342639
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
26352640
<< FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
26362641
} else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
@@ -2639,7 +2644,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
26392644

26402645
for (const auto &F : CalleeFeatureMap) {
26412646
if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
2642-
!CallerFeatureMap.find(F.getKey())->getValue()))
2647+
!CallerFeatureMap.find(F.getKey())->getValue()) &&
2648+
!IsHipStdPar)
26432649
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
26442650
<< FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
26452651
}

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3526,7 +3526,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
35263526
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
35273527
Emitter.finalize(GV);
35283528

3529-
return ConstantAddress(GV, GV->getValueType(), Alignment);
3529+
return ConstantAddress(GV, GV->getValueType(), Alignment);
35303530
}
35313531

35323532
ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
@@ -3585,7 +3585,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
35853585
!Global->hasAttr<CUDAConstantAttr>() &&
35863586
!Global->hasAttr<CUDASharedAttr>() &&
35873587
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
3588-
!Global->getType()->isCUDADeviceBuiltinTextureType())
3588+
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
3589+
!(LangOpts.HIPStdPar &&
3590+
isa<FunctionDecl>(Global) &&
3591+
!Global->hasAttr<CUDAHostAttr>()))
35893592
return;
35903593
} else {
35913594
// We need to emit host-side 'shadows' for all global
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
2+
// RUN: -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s
3+
4+
// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \
5+
// RUN: -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s
6+
7+
#define __device__ __attribute__((device))
8+
9+
// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}})
10+
// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}})
11+
extern "C" void foo(float *a, float b) {
12+
*a = b;
13+
}
14+
15+
// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
16+
// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
17+
extern "C" __device__ void bar(float *a, float b) {
18+
*a = b;
19+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
2+
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#define __global__ __attribute__((global))
5+
6+
__global__ void foo(int i) {
7+
asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i));
8+
}
9+
10+
// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}])
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
2+
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#define __global__ __attribute__((global))
5+
6+
__global__ void foo() { return __builtin_ia32_pause(); }
7+
8+
// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported()

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 477746
19+
#define LLVM_MAIN_REVISION 477759
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2978,7 +2978,10 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
29782978

29792979
// Don't fold the add if the result would be slower, unless optimising for
29802980
// size.
2981-
int64_t Shift = AddrI.getOperand(3).getImm();
2981+
unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2982+
if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
2983+
return false;
2984+
Shift = AArch64_AM::getShiftValue(Shift);
29822985
if (!OptSize) {
29832986
if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
29842987
return false;

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -923,8 +923,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
923923
return Remainder.isMinValue();
924924
}
925925

926-
static Instruction *foldIDivShl(BinaryOperator &I,
927-
InstCombiner::BuilderTy &Builder) {
926+
static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
928927
assert((I.getOpcode() == Instruction::SDiv ||
929928
I.getOpcode() == Instruction::UDiv) &&
930929
"Expected integer divide");
@@ -933,7 +932,6 @@ static Instruction *foldIDivShl(BinaryOperator &I,
933932
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
934933
Type *Ty = I.getType();
935934

936-
Instruction *Ret = nullptr;
937935
Value *X, *Y, *Z;
938936

939937
// With appropriate no-wrap constraints, remove a common factor in the
@@ -948,12 +946,12 @@ static Instruction *foldIDivShl(BinaryOperator &I,
948946

949947
// (X * Y) u/ (X << Z) --> Y u>> Z
950948
if (!IsSigned && HasNUW)
951-
Ret = BinaryOperator::CreateLShr(Y, Z);
949+
return Builder.CreateLShr(Y, Z, "", I.isExact());
952950

953951
// (X * Y) s/ (X << Z) --> Y s/ (1 << Z)
954952
if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) {
955953
Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z);
956-
Ret = BinaryOperator::CreateSDiv(Y, Shl);
954+
return Builder.CreateSDiv(Y, Shl, "", I.isExact());
957955
}
958956
}
959957

@@ -971,13 +969,13 @@ static Instruction *foldIDivShl(BinaryOperator &I,
971969
((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) ||
972970
(Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() &&
973971
Shl1->hasNoSignedWrap())))
974-
Ret = BinaryOperator::CreateUDiv(X, Y);
972+
return Builder.CreateUDiv(X, Y, "", I.isExact());
975973

976974
// For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor.
977975
// (X << Z) / (Y << Z) --> X / Y
978976
if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() &&
979977
Shl1->hasNoUnsignedWrap())
980-
Ret = BinaryOperator::CreateSDiv(X, Y);
978+
return Builder.CreateSDiv(X, Y, "", I.isExact());
981979
}
982980

983981
// If X << Y and X << Z does not overflow, then:
@@ -998,15 +996,11 @@ static Instruction *foldIDivShl(BinaryOperator &I,
998996
/*HasNSW*/
999997
IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())
1000998
: Shl0->hasNoSignedWrap());
1001-
Ret = BinaryOperator::CreateLShr(Dividend, Z);
999+
return Builder.CreateLShr(Dividend, Z, "", I.isExact());
10021000
}
10031001
}
10041002

1005-
if (!Ret)
1006-
return nullptr;
1007-
1008-
Ret->setIsExact(I.isExact());
1009-
return Ret;
1003+
return nullptr;
10101004
}
10111005

10121006
/// This function implements the transforms common to both integer division
@@ -1183,8 +1177,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
11831177
return NewDiv;
11841178
}
11851179

1186-
if (Instruction *R = foldIDivShl(I, Builder))
1187-
return R;
1180+
if (Value *R = foldIDivShl(I, Builder))
1181+
return replaceInstUsesWith(I, R);
11881182

11891183
// With the appropriate no-wrap constraint, remove a multiply by the divisor
11901184
// after peeking through another divide:
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -global-isel --aarch64-enable-sink-fold=true < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
; Test a non-LSL shift cannot be folded into the addressing mode.
7+
define void @f(ptr %p, i64 %i) optsize {
8+
; CHECK-LABEL: f:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: add x8, x0, x1, asr #32
11+
; CHECK-NEXT: strb wzr, [x8]
12+
; CHECK-NEXT: ret
13+
%d = ashr i64 %i, 32
14+
%a = getelementptr i8, ptr %p, i64 %d
15+
store i8 0, ptr %a
16+
ret void
17+
}

0 commit comments

Comments
 (0)