Skip to content

[NVPTX][AMDGPU] Move annotation creation out of clang #14634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 5, 2024
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "llvm/SYCLLowerIR/RecordSYCLAspectNames.h"
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
#include "llvm/SYCLLowerIR/SYCLConditionalCallOnDevice.h"
#include "llvm/SYCLLowerIR/SYCLCreateNVVMAnnotations.h"
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
#include "llvm/SYCLLowerIR/SYCLPropagateJointMatrixUsage.h"
#include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h"
Expand Down Expand Up @@ -1151,6 +1152,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// and before cleaning up metadata)
MPM.addPass(RecordSYCLAspectNamesPass());

if (TargetTriple.isNVPTX())
MPM.addPass(SYCLCreateNVVMAnnotationsPass());

// Remove SYCL metadata added by the frontend, like sycl_aspects
// Note, this pass should be at the end of the pipeline
MPM.addPass(CleanupSYCLMetadataPass());
Expand Down
74 changes: 0 additions & 74 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,80 +291,6 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
addNVVMMetadata(F, "grid_constant", GridConstantParamIdxs);
}
}
bool HasMaxWorkGroupSize = false;
bool HasMinWorkGroupPerCU = false;
if (const auto *MWGS = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
HasMaxWorkGroupSize = true;
// We must index-flip between SYCL's notation, X,Y,Z (aka dim0,dim1,dim2)
// with the fastest-moving dimension rightmost, to CUDA's, where X is the
// fastest-moving dimension.
addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal());
addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal());
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
}

if (const auto *RWGS = FD->getAttr<SYCLReqdWorkGroupSizeAttr>()) {
llvm::SmallVector<std::optional<int64_t>, 3> Ops;
// Index-flip and pad out any missing elements. Note the misleading
// nomenclature of the methods: getXDimVal doesn't return the X dimension;
// it returns the left-most dimension (dim0). This could correspond to
// CUDA's X, Y, or Z, depending on the number of operands provided.
if (auto Dim0 = RWGS->getXDimVal())
Ops.push_back(Dim0->getExtValue());
if (auto Dim1 = RWGS->getYDimVal())
Ops.push_back(Dim1->getExtValue());
if (auto Dim2 = RWGS->getZDimVal())
Ops.push_back(Dim2->getExtValue());
std::reverse(Ops.begin(), Ops.end());
Ops.append(3 - Ops.size(), std::nullopt);

// Work-group sizes (in NVVM annotations) must be positive and less than
// INT32_MAX, whereas SYCL can allow for larger work-group sizes (see
// -fno-sycl-id-queries-fit-in-int). If any dimension is too large for
// NVPTX, don't emit any annotation at all.
if (llvm::all_of(Ops, [](std::optional<int64_t> V) {
return !V || llvm::isUInt<31>(*V);
})) {
if (auto X = Ops[0])
addNVVMMetadata(F, "reqntidx", *X);
if (auto Y = Ops[1])
addNVVMMetadata(F, "reqntidy", *Y);
if (auto Z = Ops[2])
addNVVMMetadata(F, "reqntidz", *Z);
}
}

auto attrValue = [&](Expr *E) {
const auto *CE = cast<ConstantExpr>(E);
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
return Val->getZExtValue();
};

if (const auto *MWGPCU =
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
if (!HasMaxWorkGroupSize && FD->hasAttr<OpenCLKernelAttr>()) {
M.getDiags().Report(D->getLocation(),
diag::warn_launch_bounds_missing_attr)
<< MWGPCU << 0;
} else {
// The value is guaranteed to be > 0, pass it to the metadata.
addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue()));
HasMinWorkGroupPerCU = true;
}
}

if (const auto *MWGPMP =
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
if ((!HasMaxWorkGroupSize || !HasMinWorkGroupPerCU) &&
FD->hasAttr<OpenCLKernelAttr>()) {
M.getDiags().Report(D->getLocation(),
diag::warn_launch_bounds_missing_attr)
<< MWGPMP << 1;
} else {
// The value is guaranteed to be > 0, pass it to the metadata.
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
}
}
}

// Perform special handling in CUDA mode.
Expand Down
22 changes: 17 additions & 5 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4089,11 +4089,17 @@ bool static check32BitInt(const Expr *E, Sema &S, llvm::APSInt &I,

void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
Decl *D, const AttributeCommonInfo &CI, Expr *E) {
if (Context.getLangOpts().SYCLIsDevice &&
!Context.getTargetInfo().getTriple().isNVPTX()) {
Diag(E->getBeginLoc(), diag::warn_launch_bounds_is_cuda_specific)
<< CI << E->getSourceRange();
return;
if (Context.getLangOpts().SYCLIsDevice) {
if (!Context.getTargetInfo().getTriple().isNVPTX()) {
Diag(E->getBeginLoc(), diag::warn_launch_bounds_is_cuda_specific)
<< CI << E->getSourceRange();
return;
}

if (!D->hasAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
Diag(CI.getLoc(), diag::warn_launch_bounds_missing_attr) << CI << 0;
return;
}
}
if (!E->isValueDependent()) {
// Validate that we have an integer constant expression and then store the
Expand Down Expand Up @@ -4154,6 +4160,12 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
<< CudaArchToString(SM) << CI << E->getSourceRange();
return;
}

if (!D->hasAttr<SYCLIntelMaxWorkGroupSizeAttr>() ||
!D->hasAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
Diag(CI.getLoc(), diag::warn_launch_bounds_missing_attr) << CI << 1;
return;
}
}
if (!E->isValueDependent()) {
// Validate that we have an integer constant expression and then store the
Expand Down
31 changes: 0 additions & 31 deletions clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,37 +48,6 @@ int main() {
// CHECK: define dso_local void @{{.*}}kernel_name2() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
// CHECK: define dso_local void @{{.*}}kernel_name3() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM]] !max_work_group_size ![[MWGS_2:[0-9]+]]

// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidz", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidz", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 8}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidy", i32 4}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidz", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}

// CHECK: ![[MWGPC]] = !{i32 2}
// CHECK: ![[MWGPM]] = !{i32 4}
// CHECK: ![[MWGS]] = !{i32 8, i32 4, i32 2}
Expand Down
53 changes: 2 additions & 51 deletions clang/test/CodeGenSYCL/reqd-work-group-size.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

Expand Down Expand Up @@ -123,55 +123,6 @@ int main() {
// CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]]

// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidx", i32 16}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidy", i32 16}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidz", i32 32}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidx", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidy", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidz", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidx", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidy", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidz", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidx", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidy", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidz", i32 1}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidx", i32 16}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidy", i32 16}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidz", i32 32}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidx", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidy", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidz", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidx", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidy", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidz", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidx", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidy", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidz", i32 1}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidx", i32 16}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidy", i32 32}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name13, !"reqntidz"
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidx", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidy", i32 8}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name15, !"reqntidz"
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidx", i32 2}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidy", i32 2}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name16, !"reqntidz"
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidx", i32 8}
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidy", i32 1}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name18, !"reqntidz"
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name19, !"reqntidx", i32 32}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidy",
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidz",
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name21, !"reqntidx", i32 8}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidy",
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidz",
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name22, !"reqntidx", i32 2}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidy",
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidz",
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name24, !"reqntidx", i32 1}
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidy",
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidz",

// CHECK: ![[NDRWGS3D]] = !{i32 3}
// CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32}
// CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8}
Expand Down
9 changes: 7 additions & 2 deletions clang/test/SemaSYCL/lb_sm_70.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ int main() {
intel::min_work_groups_per_cu(4),
intel::min_work_groups_per_cu(8)]] () { volatile int A = 42; });

// expected-error@+2 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
// expected-warning@+2 {{'min_work_groups_per_cu' attribute ignored, as it requires: maximum work group size to be also specified}}
cgh.single_task<class T5>(
[=] [[intel::min_work_groups_per_cu(-8)]] () { volatile int A = 42; });
[=] [[intel::min_work_groups_per_cu(8)]] () { volatile int A = 42; });

// expected-error@+3 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
cgh.single_task<class T5>(
[=] [[intel::max_work_group_size(1, 1, 256),
intel::min_work_groups_per_cu(-8)]] () { volatile int A = 42; });
});

Q.submit([&](sycl::handler &cgh) {
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/lb_sm_90.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ template <int N1, int N2, int N3> class Functor {
operator()() const {}
};

// expected-warning@+1 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
template <int N1, int N2> class Functor_2 {
public:
// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
[[intel::max_work_group_size(1, 1, N1),
intel::max_work_groups_per_mp(N2)]] void
operator()() const {}
Expand All @@ -32,7 +32,7 @@ int main() {
intel::min_work_groups_per_cu(2),
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });

// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
// expected-warning@+3 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
cgh.single_task<class T2>(
[=] [[intel::max_work_group_size(1, 1, 256),
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });
Expand Down
30 changes: 30 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/SYCLCreateNVVMAnnotations.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===- SYCLCreateNVVMAnnotations.h - SYCLCreateNVVMAnnotationsPass --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass lowers function metadata to NVVM annotations
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_SYCL_CREATE_NVVM_ANNOTATIONS_H
#define LLVM_SYCL_CREATE_NVVM_ANNOTATIONS_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class SYCLCreateNVVMAnnotationsPass
: public PassInfoMixin<SYCLCreateNVVMAnnotationsPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

static bool isRequired() { return true; }
};

} // namespace llvm

#endif // LLVM_SYCL_CREATE_NVVM_ANNOTATIONS_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
#include "llvm/SYCLLowerIR/RecordSYCLAspectNames.h"
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
#include "llvm/SYCLLowerIR/SYCLConditionalCallOnDevice.h"
#include "llvm/SYCLLowerIR/SYCLCreateNVVMAnnotations.h"
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
#include "llvm/SYCLLowerIR/SYCLPropagateJointMatrixUsage.h"
#include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ MODULE_PASS("sycl-propagate-joint-matrix-usage", SYCLPropagateJointMatrixUsagePa
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())
MODULE_PASS("compile-time-properties", CompileTimePropertiesPass())
MODULE_PASS("cleanup-sycl-metadata", CleanupSYCLMetadataPass())
MODULE_PASS("sycl-create-nvvm-annotations", SYCLCreateNVVMAnnotationsPass())
MODULE_PASS("lower-slm-reservation-calls", ESIMDLowerSLMReservationCalls())
MODULE_PASS("record-sycl-aspect-names", RecordSYCLAspectNamesPass())
MODULE_PASS("sycl-virtual-functions-analysis",
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
SpecConstants.cpp
SYCLAddOptLevelAttribute.cpp
SYCLConditionalCallOnDevice.cpp
SYCLCreateNVVMAnnotations.cpp
SYCLDeviceLibReqMask.cpp
SYCLDeviceRequirements.cpp
SYCLKernelParamOptInfo.cpp
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "llvm/SYCLLowerIR/DeviceGlobals.h"
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
#include "llvm/SYCLLowerIR/HostPipes.h"
#include "llvm/SYCLLowerIR/TargetHelpers.h"

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringMap.h"
Expand Down Expand Up @@ -367,7 +368,8 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
SmallVector<StringRef, 3> ValStrs;
Attr.getValueAsString().split(ValStrs, ',');

assert(ValStrs.size() <= 3 &&
size_t NumDims = ValStrs.size();
assert(NumDims <= 3 &&
"sycl-work-group-size and sycl-work-group-size-hint currently only "
"support up to three values");

Expand All @@ -384,6 +386,16 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
for (StringRef ValStr : ValStrs)
MDVals.push_back(ConstantAsMetadata::get(
Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10))));
while (MDVals.size() < 3)
MDVals.push_back(ConstantAsMetadata::get(
Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, 1, 10))));

if (NumDims < 3) {
if (!F.hasMetadata("work_group_num_dim"))
F.setMetadata("work_group_num_dim",
MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), NumDims))));
}

const char *MDName = (AttrKindStr == "sycl-work-group-size")
? "reqd_work_group_size"
Expand Down Expand Up @@ -588,9 +600,13 @@ PreservedAnalyses CompileTimePropertiesPass::run(Module &M,
}

// Process all properties on kernels.
TargetHelpers::KernelCache HIPCUDAKCache;
HIPCUDAKCache.populateKernels(M);

for (Function &F : M) {
// Only consider kernels.
if (F.getCallingConv() != CallingConv::SPIR_KERNEL)
if (F.getCallingConv() != CallingConv::SPIR_KERNEL &&
!HIPCUDAKCache.isKernel(F))
continue;

// Compile time properties on kernel arguments
Expand Down
Loading
Loading