Skip to content

Commit 0f5dcae

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:86bb713142c3 into amd-gfx:fd6ee38e1eb7
Local branch amd-gfx fd6ee38 Merged main:a24418375a70 into amd-gfx:923f1fe3b764 Remote branch main 86bb713 [OpenMP][FIX] Enlarge thread state array, improve test and add second
2 parents fd6ee38 + 86bb713 commit 0f5dcae

File tree

11 files changed

+193
-30
lines changed

11 files changed

+193
-30
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 478308
19+
#define LLVM_MAIN_REVISION 478311
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/tools/llvm-profgen/ProfiledBinary.cpp

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -480,12 +480,6 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
480480
if (ShowDisassembly)
481481
outs() << '<' << SymbolName << ">:\n";
482482

483-
auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
484-
WithColor::warning() << "Invalid instructions at "
485-
<< format("%8" PRIx64, Start) << " - "
486-
<< format("%8" PRIx64, End) << "\n";
487-
};
488-
489483
uint64_t Address = StartAddress;
490484
// Size of a consecutive invalid instruction range starting from Address -1
491485
// backwards.
@@ -578,7 +572,8 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
578572
}
579573

580574
if (InvalidInstLength) {
581-
WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
575+
AddrsWithInvalidInstruction.insert(
576+
{Address - InvalidInstLength, Address - 1});
582577
InvalidInstLength = 0;
583578
}
584579
} else {
@@ -589,7 +584,8 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
589584
}
590585

591586
if (InvalidInstLength)
592-
WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
587+
AddrsWithInvalidInstruction.insert(
588+
{Address - InvalidInstLength, Address - 1});
593589

594590
if (ShowDisassembly)
595591
outs() << "\n";
@@ -708,6 +704,19 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
708704
}
709705
}
710706

707+
if (!AddrsWithInvalidInstruction.empty()) {
708+
if (ShowDetailedWarning) {
709+
for (auto &Addr : AddrsWithInvalidInstruction) {
710+
WithColor::warning()
711+
<< "Invalid instructions at " << format("%8" PRIx64, Addr.first)
712+
<< " - " << format("%8" PRIx64, Addr.second) << "\n";
713+
}
714+
}
715+
WithColor::warning() << "Found " << AddrsWithInvalidInstruction.size()
716+
<< " invalid instructions\n";
717+
AddrsWithInvalidInstruction.clear();
718+
}
719+
711720
// Dissassemble rodata section to check if FS discriminator symbol exists.
712721
checkUseFSDiscriminator(Obj, AllSymbols);
713722
}
@@ -792,10 +801,12 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
792801
FRange.StartAddress = StartAddress;
793802
FRange.EndAddress = EndAddress;
794803
} else {
795-
WithColor::warning()
796-
<< "Duplicated symbol start address at "
797-
<< format("%8" PRIx64, StartAddress) << " "
798-
<< R.first->second.getFuncName() << " and " << Name << "\n";
804+
AddrsWithMultipleSymbols.insert(StartAddress);
805+
if (ShowDetailedWarning)
806+
WithColor::warning()
807+
<< "Duplicated symbol start address at "
808+
<< format("%8" PRIx64, StartAddress) << " "
809+
<< R.first->second.getFuncName() << " and " << Name << "\n";
799810
}
800811
}
801812
}
@@ -839,14 +850,18 @@ void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
839850
if (BinaryFunctions.empty())
840851
WithColor::warning() << "Loading of DWARF info completed, but no binary "
841852
"functions have been retrieved.\n";
842-
843-
844853
// Populate the hash binary function map for MD5 function name lookup. This
845854
// is done after BinaryFunctions are finalized.
846855
for (auto &BinaryFunction : BinaryFunctions) {
847856
HashBinaryFunctions[MD5Hash(StringRef(BinaryFunction.first))] =
848857
&BinaryFunction.second;
849858
}
859+
860+
if (!AddrsWithMultipleSymbols.empty()) {
861+
WithColor::warning() << "Found " << AddrsWithMultipleSymbols.size()
862+
<< " start addresses with multiple symbols\n";
863+
AddrsWithMultipleSymbols.clear();
864+
}
850865
}
851866

852867
void ProfiledBinary::populateSymbolListFromDWARF(
@@ -881,7 +896,8 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
881896
SampleContextFrameVector CallStack;
882897
for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
883898
const auto &CallerFrame = InlineStack.getFrame(I);
884-
if (CallerFrame.FunctionName.empty() || (CallerFrame.FunctionName == "<invalid>"))
899+
if (CallerFrame.FunctionName.empty() ||
900+
(CallerFrame.FunctionName == "<invalid>"))
885901
break;
886902

887903
StringRef FunctionName(CallerFrame.FunctionName);

llvm/tools/llvm-profgen/ProfiledBinary.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ class ProfiledBinary {
230230
// GUID to Elf symbol start address map
231231
DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
232232

233+
// These maps are for temporary use of warning diagnosis.
234+
DenseSet<int64_t> AddrsWithMultipleSymbols;
235+
DenseSet<std::pair<uint64_t, uint64_t>> AddrsWithInvalidInstruction;
236+
233237
// Start address to Elf symbol GUID map
234238
std::unordered_multimap<uint64_t, uint64_t> StartAddrToSymMap;
235239

@@ -529,7 +533,7 @@ class ProfiledBinary {
529533

530534
void flushSymbolizer() { Symbolizer.reset(); }
531535

532-
MissingFrameInferrer* getMissingContextInferrer() {
536+
MissingFrameInferrer *getMissingContextInferrer() {
533537
return MissingContextInferrer.get();
534538
}
535539

mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ def Bufferization_Dialect : Dialect {
6060
/// arguments during One-Shot Module Bufferize.
6161
constexpr const static ::llvm::StringLiteral
6262
kBufferLayoutAttrName = "bufferization.buffer_layout";
63+
64+
/// An attribute that can be attached to ops with an allocation and/or
65+
/// deallocation side effect. It indicates that the op is under a "manual
66+
/// deallocation" scheme. In the case of an allocation op, the returned
67+
/// value is *not* an automatically managed allocation and assigned an
68+
/// ownership of "false". Furthermore, only deallocation ops that are
69+
/// guaranteed to deallocate a buffer under "manual deallocation" are
70+
/// allowed to have this attribute. (Deallocation ops without this
71+
/// attribute are rejected by the ownership-based buffer deallocation pass.)
72+
constexpr const static ::llvm::StringLiteral
73+
kManualDeallocation = "bufferization.manual_deallocation";
6374
}];
6475
let hasOperationAttrVerify = 1;
6576
}

mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ constexpr const ::llvm::StringLiteral BufferizationDialect::kWritableAttrName;
2828
constexpr const ::llvm::StringLiteral
2929
BufferizationDialect::kBufferLayoutAttrName;
3030

31+
/// An attribute that can be attached to ops with an allocation and/or
32+
/// deallocation side effect. It indicates that the op is under a "manual
33+
/// deallocation" scheme. In the case of an allocation op, the returned
34+
/// value is *not* an automatically managed allocation and assigned an
35+
/// ownership of "false". Furthermore, only deallocation ops that are
36+
/// guaranteed to deallocate a buffer under "manual deallocation" are
37+
/// allowed to have this attribute. (Deallocation ops without this
38+
/// attribute are rejected by the ownership-based buffer deallocation pass.)
39+
constexpr const ::llvm::StringLiteral BufferizationDialect::kManualDeallocation;
40+
3141
//===----------------------------------------------------------------------===//
3242
// Bufferization Dialect Interfaces
3343
//===----------------------------------------------------------------------===//
@@ -105,6 +115,16 @@ BufferizationDialect::verifyOperationAttribute(Operation *op,
105115
NamedAttribute attr) {
106116
using bufferization::BufferizableOpInterface;
107117

118+
if (attr.getName() == kManualDeallocation) {
119+
if (!mlir::hasEffect<MemoryEffects::Allocate>(op) &&
120+
!mlir::hasEffect<MemoryEffects::Free>(op))
121+
return op->emitOpError("attribute '")
122+
<< kManualDeallocation
123+
<< "' can be used only on ops that have an allocation and/or free "
124+
"side effect";
125+
return success();
126+
}
127+
108128
return op->emitError()
109129
<< "attribute '" << attr.getName()
110130
<< "' not supported as an op attribute by the bufferization dialect";

mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
2222
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
2323
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
24+
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
2425
#include "mlir/Dialect/Func/IR/FuncOps.h"
2526
#include "mlir/Dialect/MemRef/IR/MemRef.h"
2627
#include "mlir/Dialect/SCF/IR/SCF.h"
@@ -856,13 +857,32 @@ FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
856857
FailureOr<Operation *>
857858
BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
858859
auto *block = op->getBlock();
860+
OpBuilder builder = OpBuilder::atBlockBegin(block);
859861

860-
for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref))
861-
if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value())
862-
return op->emitError(
863-
"memory free side-effect on MemRef value not supported!");
862+
for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
863+
if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
864+
if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
865+
return op->emitError(
866+
"memory free side-effect on MemRef value not supported!");
867+
868+
// Buffers that were allocated under "manual deallocation" may be
869+
// manually deallocated. We insert a runtime assertion to cover certain
870+
// cases of invalid IR where an automatically managed buffer allocation
871+
// is manually deallocated. This is not a bulletproof check!
872+
OpBuilder::InsertionGuard g(builder);
873+
builder.setInsertionPoint(op);
874+
Ownership ownership = state.getOwnership(operand, block);
875+
if (ownership.isUnique()) {
876+
Value ownershipInverted = builder.create<arith::XOrIOp>(
877+
op.getLoc(), ownership.getIndicator(),
878+
buildBoolValue(builder, op.getLoc(), true));
879+
builder.create<cf::AssertOp>(
880+
op.getLoc(), ownershipInverted,
881+
"expected that the block does not have ownership");
882+
}
883+
}
884+
}
864885

865-
OpBuilder builder = OpBuilder::atBlockBegin(block);
866886
for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
867887
auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
868888
if (allocEffect.has_value()) {
@@ -880,6 +900,15 @@ BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
880900
continue;
881901
}
882902

903+
if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
904+
// This allocation will be deallocated manually. Assign an ownership of
905+
// "false", so that it will never be deallocated by the buffer
906+
// deallocation pass.
907+
state.resetOwnerships(res, block);
908+
state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
909+
continue;
910+
}
911+
883912
state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
884913
state.addMemrefToDeallocate(res, block);
885914
}

mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,35 @@ func.func @op_without_aliasing_and_allocation() -> memref<4xf32> {
124124
// CHECK: [[CLONE:%.+]] = bufferization.clone [[GLOBAL]]
125125
// CHECK: scf.yield [[CLONE]] :
126126
// CHECK: return [[RES]] :
127+
128+
// -----
129+
130+
// Allocations with "bufferization.manual_deallocation" are assigned an
131+
// ownership of "false".
132+
133+
func.func @manual_deallocation(%c: i1, %f: f32, %idx: index) -> f32 {
134+
%0 = memref.alloc() {bufferization.manual_deallocation} : memref<5xf32>
135+
linalg.fill ins(%f : f32) outs(%0 : memref<5xf32>)
136+
%1 = memref.alloc() : memref<5xf32>
137+
linalg.fill ins(%f : f32) outs(%1 : memref<5xf32>)
138+
%2 = arith.select %c, %0, %1 : memref<5xf32>
139+
%3 = memref.load %2[%idx] : memref<5xf32>
140+
141+
// Only buffers that are under "manual deallocation" are allowed to be
142+
// deallocated with memref.dealloc. For consistency reasons, the
143+
// manual_deallocation attribute must also be specified. A runtime insertion
144+
// is inserted to ensure that we do not have ownership. (This is not a
145+
// bulletproof check, but covers some cases of invalid IR.)
146+
memref.dealloc %0 {bufferization.manual_deallocation} : memref<5xf32>
147+
148+
return %3 : f32
149+
}
150+
151+
// CHECK-LABEL: func @manual_deallocation(
152+
// CHECK: %[[true:.*]] = arith.constant true
153+
// CHECK: %[[manual_alloc:.*]] = memref.alloc() {bufferization.manual_deallocation} : memref<5xf32>
154+
// CHECK: %[[managed_alloc:.*]] = memref.alloc() : memref<5xf32>
155+
// CHECK: %[[selected:.*]] = arith.select
156+
// CHECK: cf.assert %[[true]], "expected that the block does not have ownership"
157+
// CHECK: memref.dealloc %[[manual_alloc]]
158+
// CHECK: bufferization.dealloc (%[[managed_alloc]] : memref<5xf32>) if (%[[true]])

mlir/test/Dialect/Bufferization/invalid.mlir

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,10 @@ func.func @invalid_dealloc_wrong_number_of_results(%arg0: memref<2xf32>, %arg1:
136136
%0:3 = "bufferization.dealloc"(%arg0, %arg1, %arg2, %arg2, %arg1) <{operandSegmentSizes = array<i32: 2, 2, 1>}> : (memref<2xf32>, memref<4xi32>, i1, i1, memref<4xi32>) -> (i1, i1, i1)
137137
return %0#0 : i1
138138
}
139+
140+
// -----
141+
142+
func.func @invalid_manual_deallocation() {
143+
// expected-error @below{{op attribute 'bufferization.manual_deallocation' can be used only on ops that have an allocation and/or free side effect}}
144+
arith.constant {bufferization.manual_deallocation} 0 : index
145+
}

openmp/libomptarget/DeviceRTL/src/State.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ void state::enterDataEnvironment(IdentTy *Ident) {
262262
memory::allocGlobal(sizeof(ThreadStateTy), "ThreadStates alloc"));
263263
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
264264
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
265-
uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getMaxTeamThreads();
265+
uint32_t Bytes =
266+
sizeof(ThreadStates[0]) * mapping::getNumberOfThreadsInBlock();
266267
void *ThreadStatesPtr =
267268
memory::allocGlobal(Bytes, "Thread state array allocation");
268269
memset(ThreadStatesPtr, 0, Bytes);

openmp/libomptarget/test/offloading/thread_state_1.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// Still broken "without optimizations"
2-
// XUN: %libomptarget-compile-run-and-check-generic
1+
// RUN: %libomptarget-compile-run-and-check-generic
32
// RUN: %libomptarget-compileopt-run-and-check-generic
43

54
#include <omp.h>
@@ -10,10 +9,10 @@ int main() {
109
int o_lvl = 111, i_lvl = 222, o_tid = 333, i_tid = 333, o_nt = 444,
1110
i_nt = 555;
1211
#pragma omp target teams map(tofrom : o_lvl, i_lvl, o_tid, i_tid, o_nt, i_nt) \
13-
num_teams(2) thread_limit(2)
12+
num_teams(2) thread_limit(64)
1413
{
1514
if (omp_get_team_num() == 0) {
16-
#pragma omp parallel num_threads(128)
15+
#pragma omp parallel num_threads(64)
1716
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
1817
o_lvl = omp_get_level();
1918
o_tid = omp_get_thread_num();
@@ -27,9 +26,13 @@ int main() {
2726
}
2827
}
2928
}
30-
// CHECK: outer: lvl: 1, tid: 1, nt: 2
31-
// CHECK: inner: lvl: 2, tid: 0, nt: 1
29+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
30+
i_nt == 1) {
31+
// CHECK: Success
32+
printf("Success\n");
33+
return 0;
34+
}
3235
printf("outer: lvl: %i, tid: %i, nt: %i\n", o_lvl, o_tid, o_nt);
3336
printf("inner: lvl: %i, tid: %i, nt: %i\n", i_lvl, i_tid, i_nt);
34-
return 0;
37+
return 1;
3538
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// This fails when optimized for now.
2+
// RUN: %libomptarget-compile-run-and-check-generic
3+
// XUN: %libomptarget-compileopt-run-and-check-generic
4+
5+
#include <omp.h>
6+
#include <stdio.h>
7+
8+
int main() {
9+
// TODO: Test all ICVs on all levels
10+
int o_lvl = 111, i_lvl = 222, o_tid = 333, i_tid = 333, o_nt = 444,
11+
i_nt = 555;
12+
#pragma omp target teams map(tofrom : o_lvl, i_lvl, o_tid, i_tid, o_nt, i_nt) \
13+
num_teams(2) thread_limit(64)
14+
{
15+
omp_set_max_active_levels(1);
16+
if (omp_get_team_num() == 0) {
17+
#pragma omp parallel num_threads(64)
18+
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
19+
o_lvl = omp_get_level();
20+
o_tid = omp_get_thread_num();
21+
o_nt = omp_get_num_threads();
22+
#pragma omp parallel num_threads(64)
23+
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
24+
i_lvl = omp_get_level();
25+
i_tid = omp_get_thread_num();
26+
i_nt = omp_get_num_threads();
27+
}
28+
}
29+
}
30+
}
31+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
32+
i_nt == 1) {
33+
// CHECK: Success
34+
printf("Success\n");
35+
return 0;
36+
}
37+
printf("outer: lvl: %i, tid: %i, nt: %i\n", o_lvl, o_tid, o_nt);
38+
printf("inner: lvl: %i, tid: %i, nt: %i\n", i_lvl, i_tid, i_nt);
39+
return 1;
40+
}

0 commit comments

Comments
 (0)