Skip to content

Commit 40ff737

Browse files
author
Hugh Delaney
committed
Add pass to add global to module using new pass manager
1 parent 6e3be4e commit 40ff737

File tree

5 files changed

+102
-6
lines changed

5 files changed

+102
-6
lines changed

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
enum MemorySemanticsMask semantics, TYPE val) { \
1919
int atomic_scope = 0, memory_order = 0; \
2020
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
21-
return BUILTIN(p, val, memory_order); \
21+
return BUILTIN(p, -val, memory_order); \
2222
}
2323

2424
#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \
@@ -28,11 +28,11 @@
2828
BUILTIN) \
2929
AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN)
3030

31-
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_sub)
32-
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_sub)
33-
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_sub)
34-
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_sub)
35-
AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_sub)
31+
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_add)
32+
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_add)
33+
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_add)
34+
AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_add)
35+
AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_add)
3636

3737
#undef AMDGPU_ATOMIC
3838
#undef AMDGPU_ATOMIC_IMPL
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//===- AMDGPUAddGlobalForAtomicXor.cpp ------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Some AMDGPU atomic instructions require a prefetch in order for them to work
10+
// properly when using hipMallocManaged. This pass scans a module for the
11+
// problematic atomic instructions and creates a global PrefetchNeeded if the
12+
// builtin is present. This allows the prefetch to happen at runtime only if the
13+
// problematic builtin is chosen.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "AMDGPUAddGlobalForAtomicXor.h"
18+
#include "llvm/IR/InstIterator.h"
19+
#include "llvm/IR/Instructions.h"
20+
21+
using namespace llvm;
22+
23+
#define NEW_GLOBAL_NAME "HipAtomicXorModuleNeedsPrefetch"
24+
25+
namespace {
26+
27+
bool moduleHasAtomicXor(Module &M) {
28+
for (auto &F : M) {
29+
for (auto &I : instructions(F)) {
30+
if (auto *AtomicInst = dyn_cast<AtomicRMWInst>(&I)) {
31+
if (AtomicInst->getOperation() == AtomicRMWInst::Xor) {
32+
return true;
33+
}
34+
}
35+
}
36+
}
37+
return false;
38+
}
39+
40+
bool runImpl(Module &M) {
41+
if (moduleHasAtomicXor(M)) {
42+
LLVMContext &Ctx = M.getContext();
43+
new GlobalVariable(M, Type::getInt1Ty(Ctx), true,
44+
GlobalValue::InternalLinkage,
45+
Constant::getAllOnesValue(Type::getInt1Ty(Ctx)),
46+
NEW_GLOBAL_NAME); // FIXME: this seems wrong and bad, is
47+
// there a better way to make a new
48+
// GlobalVariable?
49+
return true;
50+
}
51+
return false;
52+
}
53+
} // end anonymous namespace
54+
55+
PreservedAnalyses
56+
AMDGPUAddGlobalForAtomicXorPass::run(Module &M, ModuleAnalysisManager &AM) {
57+
return runImpl(M) ? PreservedAnalyses::none() : PreservedAnalyses::all();
58+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
//===- AMDGPUAddGlobalForAtomicXor.h --------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Some AMDGPU atomic instructions require a prefetch in order for them to work
10+
// properly when using hipMallocManaged. This pass scans a module for the
11+
// problematic atomic instructions and creates a global PrefetchNeeded if the
12+
// builtin is present. This allows the prefetch to happen at runtime only if the
13+
// problematic builtin is chosen.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#ifndef LLVM_LIB_TARGET_AMDGPU_ADDGLOBALFORATOMICXOR_H
18+
#define LLVM_LIB_TARGET_AMDGPU_ADDGLOBALFORATOMICXOR_H
19+
20+
#include "llvm/IR/PassManager.h"
21+
22+
namespace llvm {
23+
24+
class AMDGPUAddGlobalForAtomicXorPass
25+
: public PassInfoMixin<AMDGPUAddGlobalForAtomicXorPass> {
26+
public:
27+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
28+
};
29+
30+
} // namespace llvm
31+
32+
#endif // LLVM_LIB_TARGET_AMDGPU_ADDGLOBALFORATOMICXOR_H

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "AMDGPUTargetMachine.h"
1616
#include "AMDGPU.h"
17+
#include "AMDGPUAddGlobalForAtomicXor.h"
1718
#include "AMDGPUAliasAnalysis.h"
1819
#include "AMDGPUCtorDtorLowering.h"
1920
#include "AMDGPUExportClustering.h"
@@ -618,6 +619,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
618619
PM.addPass(AMDGPUAlwaysInlinePass());
619620
return true;
620621
}
622+
if (PassName == "amdgpu-add-global-for-atomic-xor") {
623+
PM.addPass(AMDGPUAddGlobalForAtomicXorPass());
624+
return true;
625+
}
621626
if (PassName == "amdgpu-lower-module-lds") {
622627
PM.addPass(AMDGPULowerModuleLDSPass(*this));
623628
return true;

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ tablegen(LLVM InstCombineTables.inc -gen-searchable-tables)
4141
add_public_tablegen_target(InstCombineTableGen)
4242

4343
add_llvm_target(AMDGPUCodeGen
44+
AMDGPUAddGlobalForAtomicXor.cpp
4445
AMDGPUAliasAnalysis.cpp
4546
AMDGPUAlwaysInlinePass.cpp
4647
AMDGPUAnnotateKernelFeatures.cpp

0 commit comments

Comments
 (0)