Skip to content

Commit 827559e

Browse files
committed
merge main into amd-stg-open
locally : reverts d3921e4 [OpenMP] Basic BumpAllocator for (AMD)GPUs (llvm#69806) Change-Id: Id512e729870279855744ce65bfc69e2155fb68ee
2 parents 7770344 + d3921e4 commit 827559e

File tree

5 files changed

+206
-0
lines changed

5 files changed

+206
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===-------- Allocator.h - OpenMP memory allocator interface ---- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#ifndef OMPTARGET_ALLOCATOR_H
13+
#define OMPTARGET_ALLOCATOR_H
14+
15+
#include "Types.h"
16+
17+
// Forward declaration.
18+
struct KernelEnvironmentTy;
19+
20+
#pragma omp begin declare target device_type(nohost)
21+
22+
namespace ompx {
23+
24+
namespace allocator {
25+
26+
static uint64_t constexpr ALIGNMENT = 16;
27+
28+
/// Initialize the allocator according to \p KernelEnvironment
29+
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
30+
31+
/// Allocate \p Size bytes.
32+
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
33+
alloc(uint64_t Size);
34+
35+
/// Free the allocation pointed to by \p Ptr.
36+
void free(void *Ptr);
37+
38+
} // namespace allocator
39+
40+
} // namespace ompx
41+
42+
#pragma omp end declare target
43+
44+
#endif
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//===------ State.cpp - OpenMP State & ICV interface ------------- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "Allocator.h"
12+
#include "Configuration.h"
13+
#include "Environment.h"
14+
#include "Mapping.h"
15+
#include "Synchronization.h"
16+
#include "Types.h"
17+
#include "Utils.h"
18+
19+
using namespace ompx;
20+
21+
#pragma omp begin declare target device_type(nohost)
22+
23+
[[gnu::used, gnu::retain, gnu::weak,
24+
gnu::visibility(
25+
"protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
26+
[[gnu::used, gnu::retain, gnu::weak,
27+
gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
28+
__omp_rtl_device_memory_pool_tracker;
29+
30+
/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool
31+
/// directly.
32+
struct BumpAllocatorTy final {
33+
34+
void *alloc(uint64_t Size) {
35+
Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT));
36+
37+
if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) {
38+
atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1,
39+
atomic::seq_cst);
40+
atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size,
41+
atomic::seq_cst);
42+
atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size,
43+
atomic::seq_cst);
44+
atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size,
45+
atomic::seq_cst);
46+
}
47+
48+
uint64_t *Data =
49+
reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr);
50+
uint64_t End =
51+
reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size;
52+
53+
uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst);
54+
if (OldData + Size > End)
55+
__builtin_trap();
56+
57+
return reinterpret_cast<void *>(OldData);
58+
}
59+
60+
void free(void *) {}
61+
};
62+
63+
BumpAllocatorTy BumpAllocator;
64+
65+
/// allocator namespace implementation
66+
///
67+
///{
68+
69+
void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
70+
// TODO: Check KernelEnvironment for an allocator choice as soon as we have
71+
// more than one.
72+
}
73+
74+
void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
75+
76+
void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
77+
78+
///}
79+
80+
#pragma omp end declare target
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// RUN: %libomptarget-compile-generic && %libomptarget-run-generic
2+
// RUN: %libomptarget-compileopt-generic && %libomptarget-run-generic
3+
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
7+
int main() {
8+
long unsigned *DP = 0;
9+
int N = 128;
10+
int Threads = 128;
11+
int Teams = 440;
12+
13+
// Allocate ~55MB on the device.
14+
#pragma omp target map(from : DP)
15+
DP = (long unsigned *)malloc(sizeof(long unsigned) * N * Threads * Teams);
16+
17+
#pragma omp target teams distribute parallel for num_teams(Teams) \
18+
thread_limit(Threads) is_device_ptr(DP)
19+
for (int i = 0; i < Threads * Teams; ++i) {
20+
for (int j = 0; j < N; ++j) {
21+
DP[i * N + j] = i + j;
22+
}
23+
}
24+
25+
long unsigned s = 0;
26+
#pragma omp target teams distribute parallel for num_teams(Teams) \
27+
thread_limit(Threads) reduction(+ : s)
28+
for (int i = 0; i < Threads * Teams; ++i) {
29+
for (int j = 0; j < N; ++j) {
30+
s += DP[i * N + j];
31+
}
32+
}
33+
34+
// CHECK: Sum: 203458478080
35+
printf("Sum: %li\n", s);
36+
return 0;
37+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// RUN: %libomptarget-compile-generic && %libomptarget-run-generic
2+
// RUN: %libomptarget-compileopt-generic && %libomptarget-run-generic
3+
4+
#include <omp.h>
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
8+
int main() {
9+
long unsigned **DP = 0;
10+
int N = 128;
11+
int Threads = 128;
12+
int Teams = 440;
13+
14+
#pragma omp target map(from : DP)
15+
DP = (long unsigned **)malloc(sizeof(long unsigned *) * Threads * Teams);
16+
17+
#pragma omp target teams distribute parallel for num_teams(Teams) \
18+
thread_limit(Threads)
19+
for (int i = 0; i < Threads * Teams; ++i)
20+
DP[i] = (long unsigned *)malloc(sizeof(long unsigned) * N);
21+
22+
#pragma omp target teams distribute parallel for num_teams(Teams) \
23+
thread_limit(Threads)
24+
for (int i = 0; i < Threads * Teams; ++i) {
25+
for (int j = 0; j < N; ++j) {
26+
DP[i][j] = i + j;
27+
}
28+
}
29+
30+
long unsigned s = 0;
31+
#pragma omp target teams distribute parallel for num_teams(Teams) \
32+
thread_limit(Threads) reduction(+ : s)
33+
for (int i = 0; i < Threads * Teams; ++i) {
34+
for (int j = 0; j < N; ++j) {
35+
s += DP[i][j];
36+
}
37+
}
38+
39+
// CHECK: Sum: 203458478080
40+
printf("Sum: %li\n", s);
41+
return 0;
42+
}

revert_patches.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ Johannes: breakage
55
Revert "[OpenMP] Disable early vectorization of loads/stores in the runtime "
66
asserts in snap and MI-Teams
77

8+
Revert "[OpenMP] Basic BumpAllocator for (AMD)GPUs (#69806) "
9+
breaks flang-272343 flang-272343-3 flang-305553
10+
811
Nicole and Saiyed:
912
Revert " [OpenMP][DeviceRTL][AMDGPU] Support code object version 5"
1013

0 commit comments

Comments
 (0)