-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Add c_devloc as intrinsic and inline it during lowering #120648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesFull diff: https://github.com/llvm/llvm-project/pull/120648.diff 8 Files Affected:
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index b772c523caba49..ffaaa66a862a8a 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -744,6 +744,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value cPtr, mlir::Type ty);
+/// The type(C_DEVPTR) is defined as the derived type with only one
+/// component of C_PTR type. Get the C address from the C_PTR component.
+mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty);
+
/// Get the C address value.
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value cPtr);
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index bc0020e614db24..79c1aec9d56e25 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -214,6 +214,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index cdea572c147576..fb0207a7685353 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -2595,6 +2595,8 @@ class IntrinsicProcTable::Implementation {
ActualArguments &, FoldingContext &) const;
std::optional<SpecificCall> HandleC_Loc(
ActualArguments &, FoldingContext &) const;
+ std::optional<SpecificCall> HandleC_Devloc(
+ ActualArguments &, FoldingContext &) const;
const std::string &ResolveAlias(const std::string &name) const {
auto iter{aliases_.find(name)};
return iter == aliases_.end() ? name : iter->second;
@@ -2622,7 +2624,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
return true;
}
// special cases
- return name == "__builtin_c_loc" || name == "null";
+ return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
+ name == "null";
}
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
const std::string &name0) const {
@@ -3012,6 +3015,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
return std::nullopt;
}
+// CUDA Fortran C_DEVLOC(x)
+std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
+ ActualArguments &arguments, FoldingContext &context) const {
+ static const char *const keywords[]{"cptr", nullptr};
+
+ if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
+ CHECK(arguments.size() == 1);
+ const auto *expr{arguments[0].value().UnwrapExpr()};
+ if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
+ arguments[0], context)}) {
+ if (expr && !IsContiguous(*expr, context).value_or(true)) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must be contiguous"_err_en_US);
+ }
+ if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
+ constExtents && GetSize(*constExtents) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
+ }
+ if (!(typeAndShape->type().category() != TypeCategory::Derived ||
+ typeAndShape->type().IsAssumedType() ||
+ (!typeAndShape->type().IsPolymorphic() &&
+ CountNonConstantLenParameters(
+ typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
+ } else if (typeAndShape->type().knownLength().value_or(1) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be zero-length character"_err_en_US);
+ } else if (typeAndShape->type().category() != TypeCategory::Derived &&
+ !IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
+ if (typeAndShape->type().category() == TypeCategory::Character &&
+ typeAndShape->type().kind() == 1) {
+ // Default character kind, but length is not known to be 1
+ if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::CharacterInteroperability)) {
+ context.messages().Say(
+ common::UsageWarning::CharacterInteroperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
+ }
+ } else if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::Interoperability)) {
+ context.messages().Say(common::UsageWarning::Interoperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
+ }
+ }
+
+ characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
+ ddo.intent = common::Intent::In;
+ return SpecificCall{
+ SpecificIntrinsic{"__builtin_c_devloc"s,
+ characteristics::Procedure{
+ characteristics::FunctionResult{
+ DynamicType{GetBuiltinDerivedType(
+ builtinsScope_, "__builtin_c_devptr")}},
+ characteristics::DummyArguments{
+ characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
+ characteristics::Procedure::Attrs{
+ characteristics::Procedure::Attr::Pure}}},
+ std::move(arguments)};
+ }
+ }
+ return std::nullopt;
+}
+
static bool CheckForNonPositiveValues(FoldingContext &context,
const ActualArgument &arg, const std::string &procName,
const std::string &argName) {
@@ -3202,6 +3272,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
} else { // function
if (call.name == "__builtin_c_loc") {
return HandleC_Loc(arguments, context);
+ } else if (call.name == "__builtin_c_devloc") {
+ return HandleC_Devloc(arguments, context);
} else if (call.name == "null") {
return HandleNull(arguments, context);
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3a39c455015f9f..d01becfe800937 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
cPtr, addrFieldIndex);
}
+mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty) {
+ auto recTy = mlir::cast<fir::RecordType>(ty);
+ assert(recTy.getTypeList().size() == 1);
+ auto cptrFieldName = recTy.getTypeList()[0].first;
+ mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
+ auto fieldIndexType = fir::FieldType::get(ty.getContext());
+ mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
+ loc, fieldIndexType, cptrFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ auto cptrCoord = builder.create<fir::CoordinateOp>(
+ loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
+ auto [addrFieldIndex, addrFieldTy] =
+ genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
+ return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
+ cptrCoord, addrFieldIndex);
+}
+
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value cPtr) {
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 547cebefd2df47..e7a3d40a5c2ff4 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
&I::genCAssociatedCPtr,
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
{"c_f_pointer",
&I::genCFPointer,
{{{"cptr", asValue},
@@ -2828,11 +2829,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
static fir::ExtendedValue
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
- bool isFunc = false) {
+ bool isFunc = false, bool isDevLoc = false) {
assert(args.size() == 1);
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
- mlir::Value resAddr =
- fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
+ mlir::Value resAddr;
+ if (isDevLoc)
+ resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
+ else
+ resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
"argument must have been lowered to box type");
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
@@ -2889,6 +2893,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
return genCAssociated(builder, loc, resultType, args);
}
+// C_DEVLOC
+fir::ExtendedValue
+IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
+ /*isDevLoc=*/true);
+}
+
// C_F_POINTER
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index ef206dfd943102..ab12d6c3089c59 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -22,6 +22,9 @@
intrinsic :: __builtin_c_loc
public :: __builtin_c_loc
+ intrinsic :: __builtin_c_devloc
+ public :: __builtin_c_devloc
+
intrinsic :: __builtin_c_f_pointer
public :: __builtin_c_f_pointer
@@ -144,6 +147,7 @@
type :: __force_derived_type_instantiations
type(__builtin_c_ptr) :: c_ptr
+ type(__builtin_c_devptr) :: c_devptr
type(__builtin_c_funptr) :: c_funptr
type(__builtin_event_type) :: event_type
type(__builtin_lock_type) :: lock_type
diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90
index 5f2273de1e3d1e..b30a6bf6975638 100644
--- a/flang/module/__fortran_type_info.f90
+++ b/flang/module/__fortran_type_info.f90
@@ -14,7 +14,7 @@
module __fortran_type_info
use, intrinsic :: __fortran_builtins, &
- only: __builtin_c_ptr, __builtin_c_funptr
+ only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
implicit none
! Set PRIVATE by default to explicitly only export what is meant
diff --git a/flang/test/Lower/CUDA/cuda-cdevloc.cuf b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
new file mode 100644
index 00000000000000..a71490207909a8
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
@@ -0,0 +1,21 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+attributes(global) subroutine testcdevloc(a)
+ use __fortran_builtins, only: c_devloc => __builtin_c_devloc
+ integer, device :: a(10)
+ print*, c_devloc(a(1))
+end
+
+! CHECK-LABEL: func.func @_QPtestcdevloc(
+! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
+! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
+! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
+! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>
|
@llvm/pr-subscribers-flang-semantics Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesFull diff: https://github.com/llvm/llvm-project/pull/120648.diff 8 Files Affected:
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index b772c523caba49..ffaaa66a862a8a 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -744,6 +744,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value cPtr, mlir::Type ty);
+/// The type(C_DEVPTR) is defined as the derived type with only one
+/// component of C_PTR type. Get the C address from the C_PTR component.
+mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty);
+
/// Get the C address value.
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value cPtr);
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index bc0020e614db24..79c1aec9d56e25 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -214,6 +214,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index cdea572c147576..fb0207a7685353 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -2595,6 +2595,8 @@ class IntrinsicProcTable::Implementation {
ActualArguments &, FoldingContext &) const;
std::optional<SpecificCall> HandleC_Loc(
ActualArguments &, FoldingContext &) const;
+ std::optional<SpecificCall> HandleC_Devloc(
+ ActualArguments &, FoldingContext &) const;
const std::string &ResolveAlias(const std::string &name) const {
auto iter{aliases_.find(name)};
return iter == aliases_.end() ? name : iter->second;
@@ -2622,7 +2624,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
return true;
}
// special cases
- return name == "__builtin_c_loc" || name == "null";
+ return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
+ name == "null";
}
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
const std::string &name0) const {
@@ -3012,6 +3015,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
return std::nullopt;
}
+// CUDA Fortran C_DEVLOC(x)
+std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
+ ActualArguments &arguments, FoldingContext &context) const {
+ static const char *const keywords[]{"cptr", nullptr};
+
+ if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
+ CHECK(arguments.size() == 1);
+ const auto *expr{arguments[0].value().UnwrapExpr()};
+ if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
+ arguments[0], context)}) {
+ if (expr && !IsContiguous(*expr, context).value_or(true)) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must be contiguous"_err_en_US);
+ }
+ if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
+ constExtents && GetSize(*constExtents) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
+ }
+ if (!(typeAndShape->type().category() != TypeCategory::Derived ||
+ typeAndShape->type().IsAssumedType() ||
+ (!typeAndShape->type().IsPolymorphic() &&
+ CountNonConstantLenParameters(
+ typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
+ } else if (typeAndShape->type().knownLength().value_or(1) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be zero-length character"_err_en_US);
+ } else if (typeAndShape->type().category() != TypeCategory::Derived &&
+ !IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
+ if (typeAndShape->type().category() == TypeCategory::Character &&
+ typeAndShape->type().kind() == 1) {
+ // Default character kind, but length is not known to be 1
+ if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::CharacterInteroperability)) {
+ context.messages().Say(
+ common::UsageWarning::CharacterInteroperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
+ }
+ } else if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::Interoperability)) {
+ context.messages().Say(common::UsageWarning::Interoperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
+ }
+ }
+
+ characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
+ ddo.intent = common::Intent::In;
+ return SpecificCall{
+ SpecificIntrinsic{"__builtin_c_devloc"s,
+ characteristics::Procedure{
+ characteristics::FunctionResult{
+ DynamicType{GetBuiltinDerivedType(
+ builtinsScope_, "__builtin_c_devptr")}},
+ characteristics::DummyArguments{
+ characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
+ characteristics::Procedure::Attrs{
+ characteristics::Procedure::Attr::Pure}}},
+ std::move(arguments)};
+ }
+ }
+ return std::nullopt;
+}
+
static bool CheckForNonPositiveValues(FoldingContext &context,
const ActualArgument &arg, const std::string &procName,
const std::string &argName) {
@@ -3202,6 +3272,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
} else { // function
if (call.name == "__builtin_c_loc") {
return HandleC_Loc(arguments, context);
+ } else if (call.name == "__builtin_c_devloc") {
+ return HandleC_Devloc(arguments, context);
} else if (call.name == "null") {
return HandleNull(arguments, context);
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3a39c455015f9f..d01becfe800937 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
cPtr, addrFieldIndex);
}
+mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty) {
+ auto recTy = mlir::cast<fir::RecordType>(ty);
+ assert(recTy.getTypeList().size() == 1);
+ auto cptrFieldName = recTy.getTypeList()[0].first;
+ mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
+ auto fieldIndexType = fir::FieldType::get(ty.getContext());
+ mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
+ loc, fieldIndexType, cptrFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ auto cptrCoord = builder.create<fir::CoordinateOp>(
+ loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
+ auto [addrFieldIndex, addrFieldTy] =
+ genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
+ return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
+ cptrCoord, addrFieldIndex);
+}
+
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value cPtr) {
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 547cebefd2df47..e7a3d40a5c2ff4 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
&I::genCAssociatedCPtr,
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
{"c_f_pointer",
&I::genCFPointer,
{{{"cptr", asValue},
@@ -2828,11 +2829,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
static fir::ExtendedValue
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
- bool isFunc = false) {
+ bool isFunc = false, bool isDevLoc = false) {
assert(args.size() == 1);
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
- mlir::Value resAddr =
- fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
+ mlir::Value resAddr;
+ if (isDevLoc)
+ resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
+ else
+ resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
"argument must have been lowered to box type");
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
@@ -2889,6 +2893,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
return genCAssociated(builder, loc, resultType, args);
}
+// C_DEVLOC
+fir::ExtendedValue
+IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
+ /*isDevLoc=*/true);
+}
+
// C_F_POINTER
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index ef206dfd943102..ab12d6c3089c59 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -22,6 +22,9 @@
intrinsic :: __builtin_c_loc
public :: __builtin_c_loc
+ intrinsic :: __builtin_c_devloc
+ public :: __builtin_c_devloc
+
intrinsic :: __builtin_c_f_pointer
public :: __builtin_c_f_pointer
@@ -144,6 +147,7 @@
type :: __force_derived_type_instantiations
type(__builtin_c_ptr) :: c_ptr
+ type(__builtin_c_devptr) :: c_devptr
type(__builtin_c_funptr) :: c_funptr
type(__builtin_event_type) :: event_type
type(__builtin_lock_type) :: lock_type
diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90
index 5f2273de1e3d1e..b30a6bf6975638 100644
--- a/flang/module/__fortran_type_info.f90
+++ b/flang/module/__fortran_type_info.f90
@@ -14,7 +14,7 @@
module __fortran_type_info
use, intrinsic :: __fortran_builtins, &
- only: __builtin_c_ptr, __builtin_c_funptr
+ only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
implicit none
! Set PRIVATE by default to explicitly only export what is meant
diff --git a/flang/test/Lower/CUDA/cuda-cdevloc.cuf b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
new file mode 100644
index 00000000000000..a71490207909a8
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
@@ -0,0 +1,21 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+attributes(global) subroutine testcdevloc(a)
+ use __fortran_builtins, only: c_devloc => __builtin_c_devloc
+ integer, device :: a(10)
+ print*, c_devloc(a(1))
+end
+
+! CHECK-LABEL: func.func @_QPtestcdevloc(
+! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
+! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
+! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
+! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>
|
I am adding @jeanPerier on this review because I am not able to review it all by myself. |
|
Add
c_devloc
as intrinsic and inline it during lowering.c_devloc
is used in CUDA Fortran to get the address of device variables.For the moment, we borrow almost all semantic checks from
c_loc
except for the pointer or target restriction. The specifications ofc_devloc
are are pretty vague and we will relax/enforce the restrictions based on library and apps usage comparing them to the reference compiler.