Skip to content

Commit 1f21e49

Browse files
authored
Revert "Revert "[AMDGPU] const-fold imm operands of (#71669)
amdgcn_update_dpp intrinsic (#71139)"" This reverts commit d1fb930 and fixes the lit test clang/test/CodeGenHIP/dpp-const-fold.hip --------- Authored-by: Pravin Jagtap <[email protected]>
1 parent 7ec86f4 commit 1f21e49

File tree

3 files changed

+80
-53
lines changed

3 files changed

+80
-53
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 31 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
57085708
llvm::FunctionType *FTy = F->getFunctionType();
57095709

57105710
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
5711-
Value *ArgValue;
5712-
// If this is a normal argument, just emit it as a scalar.
5713-
if ((ICEArguments & (1 << i)) == 0) {
5714-
ArgValue = EmitScalarExpr(E->getArg(i));
5715-
} else {
5716-
// If this is required to be a constant, constant fold it so that we
5717-
// know that the generated intrinsic gets a ConstantInt.
5718-
ArgValue = llvm::ConstantInt::get(
5719-
getLLVMContext(),
5720-
*E->getArg(i)->getIntegerConstantExpr(getContext()));
5721-
}
5722-
5711+
Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
57235712
// If the intrinsic arg type is different from the builtin arg type
57245713
// we need to do a bit cast.
57255714
llvm::Type *PTy = FTy->getParamType(i);
@@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
85998588
}
86008589
}
86018590

8602-
if ((ICEArguments & (1 << i)) == 0) {
8603-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
8604-
} else {
8605-
// If this is required to be a constant, constant fold it so that we know
8606-
// that the generated intrinsic gets a ConstantInt.
8607-
Ops.push_back(llvm::ConstantInt::get(
8608-
getLLVMContext(),
8609-
*E->getArg(i)->getIntegerConstantExpr(getContext())));
8610-
}
8591+
Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
86118592
}
86128593

86138594
switch (BuiltinID) {
@@ -11094,15 +11075,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1109411075
continue;
1109511076
}
1109611077
}
11097-
if ((ICEArguments & (1 << i)) == 0) {
11098-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
11099-
} else {
11100-
// If this is required to be a constant, constant fold it so that we know
11101-
// that the generated intrinsic gets a ConstantInt.
11102-
Ops.push_back(llvm::ConstantInt::get(
11103-
getLLVMContext(),
11104-
*E->getArg(i)->getIntegerConstantExpr(getContext())));
11105-
}
11078+
Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
1110611079
}
1110711080

1110811081
auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -13814,16 +13787,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1381413787
assert(Error == ASTContext::GE_None && "Should not codegen an error");
1381513788

1381613789
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
13817-
// If this is a normal argument, just emit it as a scalar.
13818-
if ((ICEArguments & (1 << i)) == 0) {
13819-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
13820-
continue;
13821-
}
13822-
13823-
// If this is required to be a constant, constant fold it so that we know
13824-
// that the generated intrinsic gets a ConstantInt.
13825-
Ops.push_back(llvm::ConstantInt::get(
13826-
getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
13790+
Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
1382713791
}
1382813792

1382913793
// These exist so that the builtin that takes an immediate can be bounds
@@ -17588,6 +17552,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
1758817552
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
1758917553
}
1759017554

17555+
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
17556+
unsigned Idx,
17557+
const CallExpr *E) {
17558+
llvm::Value *Arg = nullptr;
17559+
if ((ICEArguments & (1 << Idx)) == 0) {
17560+
Arg = EmitScalarExpr(E->getArg(Idx));
17561+
} else {
17562+
// If this is required to be a constant, constant fold it so that we
17563+
// know that the generated intrinsic gets a ConstantInt.
17564+
std::optional<llvm::APSInt> Result =
17565+
E->getArg(Idx)->getIntegerConstantExpr(getContext());
17566+
assert(Result && "Expected argument to be a constant");
17567+
Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
17568+
}
17569+
return Arg;
17570+
}
17571+
1759117572
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1759217573
const CallExpr *E) {
1759317574
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17638,8 +17619,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1763817619
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
1763917620
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
1764017621
llvm::SmallVector<llvm::Value *, 6> Args;
17641-
for (unsigned I = 0; I != E->getNumArgs(); ++I)
17642-
Args.push_back(EmitScalarExpr(E->getArg(I)));
17622+
// Find out if any arguments are required to be integer constant
17623+
// expressions.
17624+
unsigned ICEArguments = 0;
17625+
ASTContext::GetBuiltinTypeError Error;
17626+
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
17627+
assert(Error == ASTContext::GE_None && "Should not codegen an error");
17628+
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
17629+
Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
17630+
}
1764317631
assert(Args.size() == 5 || Args.size() == 6);
1764417632
if (Args.size() == 5)
1764517633
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
@@ -20615,17 +20603,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
2061520603
Ops.push_back(AggValue);
2061620604
continue;
2061720605
}
20618-
20619-
// If this is a normal argument, just emit it as a scalar.
20620-
if ((ICEArguments & (1 << i)) == 0) {
20621-
Ops.push_back(EmitScalarExpr(E->getArg(i)));
20622-
continue;
20623-
}
20624-
20625-
// If this is required to be a constant, constant fold it so that we know
20626-
// that the generated intrinsic gets a ConstantInt.
20627-
Ops.push_back(llvm::ConstantInt::get(
20628-
getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
20606+
Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2062920607
}
2063020608

2063120609
Intrinsic::ID ID = Intrinsic::not_intrinsic;

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4327,6 +4327,8 @@ class CodeGenFunction : public CodeGenTypeCache {
43274327
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43284328
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43294329
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
4330+
llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
4331+
const CallExpr *E);
43304332
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43314333
llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43324334
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// REQUIRES: amdgpu-registered-target
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
3+
// RUN: -o - | FileCheck %s
4+
5+
constexpr static int OpCtrl()
6+
{
7+
return 15 + 1;
8+
}
9+
10+
constexpr static int RowMask()
11+
{
12+
return 3 + 1;
13+
}
14+
15+
constexpr static int BankMask()
16+
{
17+
return 2 + 1;
18+
}
19+
20+
constexpr static bool BountCtrl()
21+
{
22+
return true & false;
23+
}
24+
25+
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
26+
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
27+
{
28+
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
29+
}
30+
31+
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
32+
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
33+
{
34+
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
35+
}
36+
37+
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
38+
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
39+
{
40+
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
41+
}
42+
43+
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
44+
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
45+
{
46+
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
47+
}

0 commit comments

Comments
 (0)