Skip to content

Commit a1dfd82

Browse files
yxsamliuzhang2amd
authored andcommitted
[CUDA][HIP] Fix template argument deduction
nvcc allows using std::malloc and std::free in device code. When std::malloc or std::free is passed as a template function argument with template argument deduction, there is no diagnostics. e.g. __global__ void kern() { void *p = std::malloc(1); std::free(p); } int main() { std::shared_ptr<float> a; a = std::shared_ptr<float>( (float*)std::malloc(sizeof(float) * 100), std::free ); return 0; } However, the same code fails to compile with clang (https://godbolt.org/z/1roGvo6YY). The reason is that clang does not have logic to choose a function argument from an overloaded set of candidates based on host/device attributes for template argument deduction. Currently, clang does have a logic to choose a candidate based on the constraints of the candidates. This patch extends that logic to account for the CUDA host/device-based preference. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D154300 Fixes: SWDEV-446514 Change-Id: Ic5db052843848a7efb05be845178489c58a7c47a
1 parent ebda2d5 commit a1dfd82

File tree

2 files changed

+64
-4
lines changed

2 files changed

+64
-4
lines changed

clang/lib/Sema/SemaOverload.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12692,6 +12692,13 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
1269212692
DeclAccessPair DAP;
1269312693
SmallVector<FunctionDecl *, 2> AmbiguousDecls;
1269412694

12695+
// Return positive for better, negative for worse, 0 for equal preference.
12696+
auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) {
12697+
FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
12698+
return static_cast<int>(IdentifyCUDAPreference(Caller, FD1)) -
12699+
static_cast<int>(IdentifyCUDAPreference(Caller, FD2));
12700+
};
12701+
1269512702
auto CheckMoreConstrained = [&](FunctionDecl *FD1,
1269612703
FunctionDecl *FD2) -> std::optional<bool> {
1269712704
if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
@@ -12722,9 +12729,31 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
1272212729
if (!checkAddressOfFunctionIsAvailable(FD))
1272312730
continue;
1272412731

12732+
// If we found a better result, update Result.
12733+
auto FoundBetter = [&]() {
12734+
IsResultAmbiguous = false;
12735+
DAP = I.getPair();
12736+
Result = FD;
12737+
};
12738+
1272512739
// We have more than one result - see if it is more constrained than the
1272612740
// previous one.
1272712741
if (Result) {
12742+
// Check CUDA preference first. If the candidates have differennt CUDA
12743+
// preference, choose the one with higher CUDA preference. Otherwise,
12744+
// choose the one with more constraints.
12745+
if (getLangOpts().CUDA) {
12746+
int PreferenceByCUDA = CheckCUDAPreference(FD, Result);
12747+
// FD has different preference than Result.
12748+
if (PreferenceByCUDA != 0) {
12749+
// FD is more preferable than Result.
12750+
if (PreferenceByCUDA > 0)
12751+
FoundBetter();
12752+
continue;
12753+
}
12754+
}
12755+
// FD has the same CUDA prefernece than Result. Continue check
12756+
// constraints.
1272812757
std::optional<bool> MoreConstrainedThanPrevious =
1272912758
CheckMoreConstrained(FD, Result);
1273012759
if (!MoreConstrainedThanPrevious) {
@@ -12736,9 +12765,7 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
1273612765
continue;
1273712766
// FD is more constrained - replace Result with it.
1273812767
}
12739-
IsResultAmbiguous = false;
12740-
DAP = I.getPair();
12741-
Result = FD;
12768+
FoundBetter();
1274212769
}
1274312770

1274412771
if (IsResultAmbiguous)
@@ -12748,9 +12775,15 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
1274812775
SmallVector<const Expr *, 1> ResultAC;
1274912776
// We skipped over some ambiguous declarations which might be ambiguous with
1275012777
// the selected result.
12751-
for (FunctionDecl *Skipped : AmbiguousDecls)
12778+
for (FunctionDecl *Skipped : AmbiguousDecls) {
12779+
// If skipped candidate has different CUDA preference than the result,
12780+
// there is no ambiguity. Otherwise check whether they have different
12781+
// constraints.
12782+
if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0)
12783+
continue;
1275212784
if (!CheckMoreConstrained(Skipped, Result))
1275312785
return nullptr;
12786+
}
1275412787
Pair = DAP;
1275512788
}
1275612789
return Result;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
2+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
3+
4+
// expected-no-diagnostics
5+
6+
#include "Inputs/cuda.h"
7+
8+
void foo();
9+
__device__ void foo();
10+
11+
template<class F>
12+
void host_temp(F f);
13+
14+
template<class F>
15+
__device__ void device_temp(F f);
16+
17+
void host_caller() {
18+
host_temp(foo);
19+
}
20+
21+
__global__ void kernel_caller() {
22+
device_temp(foo);
23+
}
24+
25+
__device__ void device_caller() {
26+
device_temp(foo);
27+
}

0 commit comments

Comments
 (0)