Skip to content

[flang][cuda] Relax semanctic check in cuf kernel and openacc compute constructs #125750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 4, 2025

Conversation

clementval
Copy link
Contributor

Previous patch was too restrictive and didn't take into account cuf kernels and openacc compute constructs as being device context.

@llvmbot llvmbot added flang Flang issues not falling into any other category flang:semantics labels Feb 4, 2025
@llvmbot
Copy link
Member

llvmbot commented Feb 4, 2025

@llvm/pr-subscribers-flang-semantics

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Previous patch was too restrictive and didn't take into account cuf kernels and openacc compute constructs as being device context.


Full diff: https://github.com/llvm/llvm-project/pull/125750.diff

3 Files Affected:

  • (modified) flang/lib/Semantics/assignment.cpp (+42-1)
  • (modified) flang/lib/Semantics/assignment.h (+8)
  • (modified) flang/test/Semantics/cuf18.cuf (+58-1)
diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 2b562571a679ee..627983d19a8221 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -42,6 +42,7 @@ class AssignmentContext {
   void Analyze(const parser::AssignmentStmt &);
   void Analyze(const parser::PointerAssignmentStmt &);
   void Analyze(const parser::ConcurrentControl &);
+  int deviceConstructDepth_{0};
 
 private:
   bool CheckForPureContext(const SomeExpr &rhs, parser::CharBlock rhsSource);
@@ -94,7 +95,7 @@ void AssignmentContext::Analyze(const parser::AssignmentStmt &stmt) {
             common::LanguageFeature::CUDA)) {
       const auto &scope{context_.FindScope(lhsLoc)};
       const Scope &progUnit{GetProgramUnitContaining(scope)};
-      if (!IsCUDADeviceContext(&progUnit)) {
+      if (!IsCUDADeviceContext(&progUnit) && deviceConstructDepth_ == 0) {
         if (Fortran::evaluate::HasCUDADeviceAttrs(lhs) &&
             Fortran::evaluate::HasCUDAImplicitTransfer(rhs)) {
           context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
@@ -228,6 +229,46 @@ void AssignmentChecker::Enter(const parser::MaskedElsewhereStmt &x) {
 void AssignmentChecker::Leave(const parser::MaskedElsewhereStmt &) {
   context_.value().PopWhereContext();
 }
+void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
+static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDirective =
+      std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
+  const auto &blockDirective =
+      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
+  if (blockDirective.v == llvm::acc::ACCD_parallel ||
+      blockDirective.v == llvm::acc::ACCD_serial ||
+      blockDirective.v == llvm::acc::ACCD_kernels) {
+    return true;
+  }
+  return false;
+}
+void AssignmentChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    ++context_.value().deviceConstructDepth_;
+  }
+}
+void AssignmentChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    --context_.value().deviceConstructDepth_;
+  }
+}
+void AssignmentChecker::Enter(const parser::OpenACCCombinedConstruct &) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::OpenACCCombinedConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Enter(const parser::OpenACCLoopConstruct &) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::OpenACCLoopConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
 
 } // namespace Fortran::semantics
 template class Fortran::common::Indirection<
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index 95d7b3cf91b17a..a67bee4a03dfc0 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -45,6 +45,14 @@ class AssignmentChecker : public virtual BaseChecker {
   void Leave(const parser::EndWhereStmt &);
   void Enter(const parser::MaskedElsewhereStmt &);
   void Leave(const parser::MaskedElsewhereStmt &);
+  void Enter(const parser::CUFKernelDoConstruct &);
+  void Leave(const parser::CUFKernelDoConstruct &);
+  void Enter(const parser::OpenACCBlockConstruct &);
+  void Leave(const parser::OpenACCBlockConstruct &);
+  void Enter(const parser::OpenACCCombinedConstruct &);
+  void Leave(const parser::OpenACCCombinedConstruct &);
+  void Enter(const parser::OpenACCLoopConstruct &);
+  void Leave(const parser::OpenACCLoopConstruct &);
 
 private:
   common::Indirection<AssignmentContext> context_;
diff --git a/flang/test/Semantics/cuf18.cuf b/flang/test/Semantics/cuf18.cuf
index ce9a2a31ca0d15..e51e5c9f97e03f 100644
--- a/flang/test/Semantics/cuf18.cuf
+++ b/flang/test/Semantics/cuf18.cuf
@@ -1,10 +1,67 @@
-! RUN: %python %S/test_errors.py %s %flang_fc1
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc
 
 subroutine sub1()
   real, allocatable, device :: a(:)
+  integer :: i
 
 !ERROR: Unsupported CUDA data transfer
   a = a + 10 ! Illegal expression according to 3.4.2
+
+  !$cuf kernel do
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in cuf kernel do
+  end do
+
+  !$acc parallel loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc serial loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc kernels loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end kernels
+
+  !$acc serial
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end serial
+
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok acc loop
+  end do
+
+  !$acc data
+
+  do i = 1, 10
+!ERROR: Unsupported CUDA data transfer
+    a(i) = a(i) + 10
+  end do
+
+  !$acc end data
+
 end subroutine
 
 

@clementval clementval merged commit bbc90f8 into llvm:main Feb 4, 2025
9 of 10 checks passed
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025
… constructs (llvm#125750)

Previous patch was too restrictive and didn't take into account cuf
kernels and openacc compute constructs as being device context.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:semantics flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants