Skip to content

Commit 7ffeaf0

Browse files
harishch4DominikAdamskitblah
authored
[MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198)
Rebased @DominikAdamski patch: https://reviews.llvm.org/D142722 --------- Co-authored-by: Dominik Adamski <[email protected]> Co-authored-by: Tom Eccles <[email protected]>
1 parent 85e8d62 commit 7ffeaf0

File tree

6 files changed

+170
-5
lines changed

6 files changed

+170
-5
lines changed

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "flang/Lower/PFTBuilder.h"
1717
#include "flang/Parser/tools.h"
1818
#include "flang/Semantics/tools.h"
19+
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
1920

2021
namespace Fortran {
2122
namespace lower {
@@ -514,6 +515,65 @@ bool ClauseProcessor::processUntied(mlir::omp::UntiedClauseOps &result) const {
514515
//===----------------------------------------------------------------------===//
515516
// ClauseProcessor repeatable clauses
516517
//===----------------------------------------------------------------------===//
518+
static llvm::StringMap<bool> getTargetFeatures(mlir::ModuleOp module) {
519+
llvm::StringMap<bool> featuresMap;
520+
llvm::SmallVector<llvm::StringRef> targetFeaturesVec;
521+
if (mlir::LLVM::TargetFeaturesAttr features =
522+
fir::getTargetFeatures(module)) {
523+
llvm::ArrayRef<mlir::StringAttr> featureAttrs = features.getFeatures();
524+
for (auto &featureAttr : featureAttrs) {
525+
llvm::StringRef featureKeyString = featureAttr.strref();
526+
featuresMap[featureKeyString.substr(1)] = (featureKeyString[0] == '+');
527+
}
528+
}
529+
return featuresMap;
530+
}
531+
532+
static void
533+
addAlignedClause(lower::AbstractConverter &converter,
534+
const omp::clause::Aligned &clause,
535+
llvm::SmallVectorImpl<mlir::Value> &alignedVars,
536+
llvm::SmallVectorImpl<mlir::Attribute> &alignmentAttrs) {
537+
using Aligned = omp::clause::Aligned;
538+
lower::StatementContext stmtCtx;
539+
mlir::IntegerAttr alignmentValueAttr;
540+
int64_t alignment = 0;
541+
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
542+
543+
if (auto &alignmentValueParserExpr =
544+
std::get<std::optional<Aligned::Alignment>>(clause.t)) {
545+
mlir::Value operand = fir::getBase(
546+
converter.genExprValue(*alignmentValueParserExpr, stmtCtx));
547+
alignment = *fir::getIntIfConstant(operand);
548+
} else {
549+
llvm::StringMap<bool> featuresMap = getTargetFeatures(builder.getModule());
550+
llvm::Triple triple = fir::getTargetTriple(builder.getModule());
551+
alignment =
552+
llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(triple, featuresMap);
553+
}
554+
555+
// The default alignment for some targets is equal to 0.
556+
// Do not generate alignment assumption if alignment is less than or equal to
557+
// 0.
558+
if (alignment > 0) {
559+
auto &objects = std::get<omp::ObjectList>(clause.t);
560+
if (!objects.empty())
561+
genObjectList(objects, converter, alignedVars);
562+
alignmentValueAttr = builder.getI64IntegerAttr(alignment);
563+
// All the list items in a aligned clause will have same alignment
564+
for (std::size_t i = 0; i < objects.size(); i++)
565+
alignmentAttrs.push_back(alignmentValueAttr);
566+
}
567+
}
568+
569+
bool ClauseProcessor::processAligned(
570+
mlir::omp::AlignedClauseOps &result) const {
571+
return findRepeatableClause<omp::clause::Aligned>(
572+
[&](const omp::clause::Aligned &clause, const parser::CharBlock &) {
573+
addAlignedClause(converter, clause, result.alignedVars,
574+
result.alignmentAttrs);
575+
});
576+
}
517577

518578
bool ClauseProcessor::processAllocate(
519579
mlir::omp::AllocateClauseOps &result) const {

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class ClauseProcessor {
9090
bool processUntied(mlir::omp::UntiedClauseOps &result) const;
9191

9292
// 'Repeatable' clauses: They can appear multiple times in the clause list.
93+
bool processAligned(mlir::omp::AlignedClauseOps &result) const;
9394
bool processAllocate(mlir::omp::AllocateClauseOps &result) const;
9495
bool processCopyin() const;
9596
bool processCopyprivate(mlir::Location currentLocation,
@@ -140,7 +141,6 @@ class ClauseProcessor {
140141
template <typename T>
141142
bool processMotionClauses(lower::StatementContext &stmtCtx,
142143
mlir::omp::MapClauseOps &result);
143-
144144
// Call this method for these clauses that should be supported but are not
145145
// implemented yet. It triggers a compilation error if any of the given
146146
// clauses is found.

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,15 +1070,15 @@ static void genSimdClauses(lower::AbstractConverter &converter,
10701070
const List<Clause> &clauses, mlir::Location loc,
10711071
mlir::omp::SimdClauseOps &clauseOps) {
10721072
ClauseProcessor cp(converter, semaCtx, clauses);
1073+
cp.processAligned(clauseOps);
10731074
cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
10741075
cp.processReduction(loc, clauseOps);
10751076
cp.processSafelen(clauseOps);
10761077
cp.processSimdlen(clauseOps);
1077-
// TODO Support delayed privatization.
10781078

1079-
cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
1080-
clause::Nontemporal, clause::Order>(
1081-
loc, llvm::omp::Directive::OMPD_simd);
1079+
// TODO Support delayed privatization.
1080+
cp.processTODO<clause::Allocate, clause::Linear, clause::Nontemporal,
1081+
clause::Order>(loc, llvm::omp::Directive::OMPD_simd);
10821082
}
10831083

10841084
static void genSingleClauses(lower::AbstractConverter &converter,

flang/test/Lower/OpenMP/simd.f90

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,44 @@ subroutine simd_with_collapse_clause(n)
182182
end do
183183
!$OMP END SIMD
184184
end subroutine
185+
186+
187+
!CHECK: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
188+
!CHECK-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
189+
!CHECK-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
190+
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
191+
!CHECK-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
192+
!CHECK-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
193+
!CHECK-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
194+
!CHECK-SAME: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
195+
subroutine simdloop_aligned_cptr( A)
196+
use iso_c_binding
197+
integer :: i
198+
type (c_ptr) :: A
199+
!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
200+
!CHECK-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
201+
!CHECK-SAME: -> 256 : i64)
202+
!$OMP SIMD ALIGNED(A:256)
203+
do i = 1, 10
204+
call c_test_call(A)
205+
end do
206+
!$OMP END SIMD
207+
end subroutine
208+
209+
!CHECK-LABEL: func @_QPsimdloop_aligned_allocatable
210+
subroutine simdloop_aligned_allocatable()
211+
integer :: i
212+
integer, allocatable :: A(:)
213+
allocate(A(10))
214+
!CHECK: %[[A_PTR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a",
215+
!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"}
216+
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_PTR]] {fortran_attrs = #fir.var_attrs<allocatable>,
217+
!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"} :
218+
!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) ->
219+
!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
220+
!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> 256 : i64)
221+
!$OMP SIMD ALIGNED(A:256)
222+
do i = 1, 10
223+
A(i) = i
224+
end do
225+
end subroutine
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64
2+
! The default alignment for AARCH64 is 0 so we do not emit aligned clause
3+
! REQUIRES: aarch64-registered-target
4+
! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s
5+
subroutine simdloop_aligned_cptr(A)
6+
use iso_c_binding
7+
integer :: i
8+
type (c_ptr) :: A
9+
!CHECK: omp.simd
10+
!CHECK-NOT: aligned(
11+
!$OMP SIMD ALIGNED(A)
12+
do i = 1, 10
13+
call c_test_call(A)
14+
end do
15+
!$OMP END SIMD
16+
end subroutine
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
! Tests for 2.9.3.1 Simd and target dependent defult alignment for x86
2+
! REQUIRES: x86-registered-target
3+
! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 %s -o - | FileCheck --check-prefixes=DEFAULT %s
4+
! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 -target-feature +avx %s -o - | FileCheck --check-prefixes=AVX %s
5+
! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 -target-feature +avx512f %s -o - | FileCheck --check-prefixes=AVX512F %s
6+
!DEFAULT: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
7+
!DEFAULT-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
8+
!DEFAULT-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
9+
!DEFAULT: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
10+
!DEFAULT-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
11+
!DEFAULT-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
12+
!DEFAULT-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
13+
!DEFAULT-SAME: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
14+
!AVX: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
15+
!AVX-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
16+
!AVX-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
17+
!AVX: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
18+
!AVX-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
19+
!AVX-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
20+
!AVX-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
21+
!AVX-SAME: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
22+
!AVX512F: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref
23+
!AVX512F-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr
24+
!AVX512F-SAME: {__address:i64}>> {fir.bindc_name = "a"}) {
25+
!AVX512F: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0
26+
!AVX512F-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} :
27+
!AVX512F-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.dscope) ->
28+
!AVX512F-SAME: (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>,
29+
!AVX512F-SAME: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
30+
subroutine simdloop_aligned_cptr(A)
31+
use iso_c_binding
32+
integer :: i
33+
type (c_ptr) :: A
34+
!DEFAULT: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
35+
!DEFAULT-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
36+
!DEFAULT-SAME: -> 128 : i64)
37+
!AVX: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
38+
!AVX-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
39+
!AVX-SAME: -> 256 : i64)
40+
!AVX512F: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref
41+
!AVX512F-SAME: <!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
42+
!AVX512F-SAME: -> 512 : i64)
43+
!$OMP SIMD ALIGNED(A)
44+
do i = 1, 10
45+
call c_test_call(A)
46+
end do
47+
!$OMP END SIMD
48+
end subroutine

0 commit comments

Comments
 (0)