Skip to content

Commit c09fbbd

Browse files
committed
Reapply "[GlobalOpt][FIX] Do not embed initializers into AS!=0 globals""
This reapplies commit 7dbba33, or, put differently, this reverts commit d9a8d20. The test now requires the amdgpu and nvptx backend explicitly as it won't work without properly.
1 parent b053228 commit c09fbbd

File tree

9 files changed

+109
-15
lines changed

9 files changed

+109
-15
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,10 @@ class TargetTransformInfo {
383383

384384
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385385

386+
/// Return true if globals in this address space can have initializers other
387+
/// than `undef`.
388+
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
389+
386390
unsigned getAssumedAddrSpace(const Value *V) const;
387391

388392
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
@@ -1457,6 +1461,8 @@ class TargetTransformInfo::Concept {
14571461
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
14581462
Intrinsic::ID IID) const = 0;
14591463
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1464+
virtual bool
1465+
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
14601466
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
14611467
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
14621468
Value *OldV,
@@ -1782,6 +1788,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
17821788
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
17831789
}
17841790

1791+
bool
1792+
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1793+
return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1794+
}
1795+
17851796
unsigned getAssumedAddrSpace(const Value *V) const override {
17861797
return Impl.getAssumedAddrSpace(V);
17871798
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ class TargetTransformInfoImplBase {
105105
}
106106

107107
bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
108+
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
109+
return AS == 0;
110+
};
108111

109112
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
110113

llvm/include/llvm/Transforms/Utils/GlobalStatus.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
1010
#define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
1111

12+
#include "llvm/IR/Instructions.h"
1213
#include "llvm/Support/AtomicOrdering.h"
1314

1415
namespace llvm {
@@ -45,7 +46,7 @@ struct GlobalStatus {
4546

4647
/// This global is stored to, but only its initializer and one other value
4748
/// is ever stored to it. If this global isStoredOnce, we track the value
48-
/// stored to it in StoredOnceValue below. This is only tracked for scalar
49+
/// stored to it via StoredOnceStore below. This is only tracked for scalar
4950
/// globals.
5051
StoredOnce,
5152

@@ -55,8 +56,16 @@ struct GlobalStatus {
5556
} StoredType = NotStored;
5657

5758
/// If only one value (besides the initializer constant) is ever stored to
58-
/// this global, keep track of what value it is.
59-
Value *StoredOnceValue = nullptr;
59+
/// this global, keep track of what value it is via the store instruction.
60+
const StoreInst *StoredOnceStore = nullptr;
61+
62+
/// If only one value (besides the initializer constant) is ever stored to
63+
/// this global return the stored value.
64+
Value *getStoredOnceValue() const {
65+
return (StoredType == StoredOnce && StoredOnceStore)
66+
? StoredOnceStore->getOperand(0)
67+
: nullptr;
68+
}
6069

6170
/// These start out null/false. When the first accessing function is noticed,
6271
/// it is recorded. When a second different accessing function is noticed,

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
259259
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
260260
}
261261

262+
bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
263+
unsigned AS) const {
264+
return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
265+
}
266+
262267
unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
263268
return TTIImpl->getAssumedAddrSpace(V);
264269
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
182182

183183
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
184184
Intrinsic::ID IID) const;
185+
186+
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
187+
return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
188+
AS != AMDGPUAS::PRIVATE_ADDRESS;
189+
}
190+
185191
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
186192
Value *NewV) const;
187193

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
4848
return AddressSpace::ADDRESS_SPACE_GENERIC;
4949
}
5050

51+
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
52+
return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
53+
AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
54+
}
55+
5156
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
5257
IntrinsicInst &II) const;
5358

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,6 +1507,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
15071507
/// it if possible. If we make a change, return true.
15081508
static bool
15091509
processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1510+
function_ref<TargetTransformInfo &(Function &)> GetTTI,
15101511
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
15111512
function_ref<DominatorTree &(Function &)> LookupDomTree) {
15121513
auto &DL = GV->getParent()->getDataLayout();
@@ -1605,18 +1606,26 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
16051606
if (SRAGlobal(GV, DL))
16061607
return true;
16071608
}
1608-
if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
1609+
Value *StoredOnceValue = GS.getStoredOnceValue();
1610+
if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
16091611
// Avoid speculating constant expressions that might trap (div/rem).
1610-
auto *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue);
1612+
auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
16111613
if (SOVConstant && SOVConstant->canTrap())
16121614
return Changed;
16131615

1616+
Function &StoreFn =
1617+
const_cast<Function &>(*GS.StoredOnceStore->getFunction());
16141618
// If the initial value for the global was an undef value, and if only
16151619
// one other value was stored into it, we can just change the
16161620
// initializer to be the stored value, then delete all stores to the
16171621
// global. This allows us to mark it constant.
1622+
// This is restricted to address spaces that allow globals to have
1623+
// initializers. NVPTX, for example, does not support initializers for
1624+
// shared memory (AS 3).
16181625
if (SOVConstant && SOVConstant->getType() == GV->getValueType() &&
1619-
isa<UndefValue>(GV->getInitializer())) {
1626+
isa<UndefValue>(GV->getInitializer()) &&
1627+
GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
1628+
GV->getType()->getAddressSpace())) {
16201629
// Change the initial value here.
16211630
GV->setInitializer(SOVConstant);
16221631

@@ -1635,8 +1644,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
16351644

16361645
// Try to optimize globals based on the knowledge that only one value
16371646
// (besides its initializer) is ever stored to the global.
1638-
if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
1639-
GetTLI))
1647+
if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
16401648
return true;
16411649

16421650
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1656,6 +1664,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
16561664
/// make a change, return true.
16571665
static bool
16581666
processGlobal(GlobalValue &GV,
1667+
function_ref<TargetTransformInfo &(Function &)> GetTTI,
16591668
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
16601669
function_ref<DominatorTree &(Function &)> LookupDomTree) {
16611670
if (GV.getName().startswith("llvm."))
@@ -1688,7 +1697,8 @@ processGlobal(GlobalValue &GV,
16881697
if (GVar->isConstant() || !GVar->hasInitializer())
16891698
return Changed;
16901699

1691-
return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
1700+
return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
1701+
Changed;
16921702
}
16931703

16941704
/// Walk all of the direct calls of the specified function, changing them to
@@ -1991,7 +2001,7 @@ OptimizeFunctions(Module &M,
19912001
}
19922002
}
19932003

1994-
Changed |= processGlobal(*F, GetTLI, LookupDomTree);
2004+
Changed |= processGlobal(*F, GetTTI, GetTLI, LookupDomTree);
19952005

19962006
if (!F->hasLocalLinkage())
19972007
continue;
@@ -2060,6 +2070,7 @@ OptimizeFunctions(Module &M,
20602070

20612071
static bool
20622072
OptimizeGlobalVars(Module &M,
2073+
function_ref<TargetTransformInfo &(Function &)> GetTTI,
20632074
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
20642075
function_ref<DominatorTree &(Function &)> LookupDomTree,
20652076
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
@@ -2088,7 +2099,7 @@ OptimizeGlobalVars(Module &M,
20882099
continue;
20892100
}
20902101

2091-
Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
2102+
Changed |= processGlobal(*GV, GetTTI, GetTLI, LookupDomTree);
20922103
}
20932104
return Changed;
20942105
}
@@ -2666,8 +2677,8 @@ static bool optimizeGlobalsInModule(
26662677
});
26672678

26682679
// Optimize non-address-taken globals.
2669-
LocalChange |=
2670-
OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
2680+
LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
2681+
NotDiscardableComdats);
26712682

26722683
// Resolve aliases, when possible.
26732684
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);

llvm/lib/Transforms/Utils/GlobalStatus.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
127127
GS.StoredType = GlobalStatus::InitializerStored;
128128
} else if (GS.StoredType < GlobalStatus::StoredOnce) {
129129
GS.StoredType = GlobalStatus::StoredOnce;
130-
GS.StoredOnceValue = StoredVal;
130+
GS.StoredOnceStore = SI;
131131
} else if (GS.StoredType == GlobalStatus::StoredOnce &&
132-
GS.StoredOnceValue == StoredVal) {
132+
GS.getStoredOnceValue() == StoredVal) {
133133
// noop.
134134
} else {
135135
GS.StoredType = GlobalStatus::Stored;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; RUN: opt -passes=globalopt < %s -S | FileCheck %s
2+
; RUN: opt -passes=globalopt --mtriple=nvptx64 < %s -S | FileCheck %s --check-prefix=GPU
3+
; RUN: opt -passes=globalopt --mtriple=amdgcn < %s -S | FileCheck %s --check-prefix=GPU
4+
; REQUIRES: amdgpu-registered-target, nvptx-registered-target
5+
6+
; Check that we don't try to set a global initializer for non AS(0) globals.
7+
8+
@g0 = internal global i16 undef
9+
@g1 = internal addrspace(3) global i16 undef
10+
@g2 = internal addrspace(1) global i16 undef
11+
; CHECK: internal unnamed_addr constant i16 3
12+
; CHECK: internal unnamed_addr addrspace(3) global i16 undef
13+
; CHECK: internal unnamed_addr addrspace(1) global i16 undef
14+
; GPU: internal unnamed_addr constant i16 3
15+
; GPU: internal unnamed_addr addrspace(3) global i16 undef
16+
; GPU: internal unnamed_addr addrspace(1) constant i16 7
17+
18+
define void @a() {
19+
store i16 3, i16* @g0, align 8
20+
store i16 5, i16* addrspacecast (i16 addrspace(3)* @g1 to i16*), align 8
21+
store i16 7, i16* addrspacecast (i16 addrspace(1)* @g2 to i16*), align 8
22+
ret void
23+
}
24+
25+
define i8 @get0() {
26+
%bc = bitcast i16* @g0 to i8*
27+
%gep = getelementptr i8, i8* %bc, i64 1
28+
%r = load i8, i8* %gep
29+
ret i8 %r
30+
}
31+
define i8 @get1() {
32+
%ac = addrspacecast i16 addrspace(3)* @g1 to i16*
33+
%bc = bitcast i16* %ac to i8*
34+
%gep = getelementptr i8, i8* %bc, i64 1
35+
%r = load i8, i8* %gep
36+
ret i8 %r
37+
}
38+
define i8 @get2() {
39+
%ac = addrspacecast i16 addrspace(1)* @g2 to i16*
40+
%bc = bitcast i16* %ac to i8*
41+
%gep = getelementptr i8, i8* %bc, i64 1
42+
%r = load i8, i8* %gep
43+
ret i8 %r
44+
}

0 commit comments

Comments
 (0)