Skip to content

Commit d7d4aa5

Browse files
committed
AMDGPU: Move AMDGPUAttributor run earlier
Move it up with other module passes. It's a higher level optimization that should probably be done before hacking up the IR for codegen. It should really be done earlier than this. We could possibly move this with other IPO passes, but we'd have to stop inferring the lack of lds.kernel.id calls and have the LDS module pass mark functions which don't need the ID. The one test change is because that pass is relying on the backend run of SROA (which we ideally wouldn't have).
1 parent dc9f6f4 commit d7d4aa5

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,11 @@ void AMDGPUPassConfig::addIRPasses() {
990990
addPass(createAMDGPULowerModuleLDSPass());
991991
}
992992

993+
// AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
994+
// after their introduction
995+
if (TM.getOptLevel() > CodeGenOpt::None)
996+
addPass(createAMDGPUAttributorPass());
997+
993998
if (TM.getOptLevel() > CodeGenOpt::None)
994999
addPass(createInferAddressSpacesPass());
9951000

@@ -1046,9 +1051,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
10461051
if (RemoveIncompatibleFunctions)
10471052
addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
10481053

1049-
if (TM->getOptLevel() > CodeGenOpt::None)
1050-
addPass(createAMDGPUAttributorPass());
1051-
10521054
// FIXME: This pass adds 2 hacky attributes that can be replaced with an
10531055
// analysis, and should be removed.
10541056
addPass(createAMDGPUAnnotateKernelFeaturesPass());

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@
185185
; GCN-O1-NEXT: Function Alias Analysis Results
186186
; GCN-O1-NEXT: Lower OpenCL enqueued blocks
187187
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
188+
; GCN-O1-NEXT: AMDGPU Attributor
189+
; GCN-O1-NEXT: FunctionPass Manager
190+
; GCN-O1-NEXT: Cycle Info Analysis
188191
; GCN-O1-NEXT: FunctionPass Manager
189192
; GCN-O1-NEXT: Infer address spaces
190193
; GCN-O1-NEXT: Expand Atomic instructions
@@ -224,9 +227,6 @@
224227
; GCN-O1-NEXT: Natural Loop Information
225228
; GCN-O1-NEXT: TLS Variable Hoist
226229
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
227-
; GCN-O1-NEXT: AMDGPU Attributor
228-
; GCN-O1-NEXT: FunctionPass Manager
229-
; GCN-O1-NEXT: Cycle Info Analysis
230230
; GCN-O1-NEXT: CallGraph Construction
231231
; GCN-O1-NEXT: Call Graph SCC Pass Manager
232232
; GCN-O1-NEXT: AMDGPU Annotate Kernel Features
@@ -461,6 +461,9 @@
461461
; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
462462
; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks
463463
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
464+
; GCN-O1-OPTS-NEXT: AMDGPU Attributor
465+
; GCN-O1-OPTS-NEXT: FunctionPass Manager
466+
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
464467
; GCN-O1-OPTS-NEXT: FunctionPass Manager
465468
; GCN-O1-OPTS-NEXT: Infer address spaces
466469
; GCN-O1-OPTS-NEXT: Expand Atomic instructions
@@ -508,9 +511,6 @@
508511
; GCN-O1-OPTS-NEXT: TLS Variable Hoist
509512
; GCN-O1-OPTS-NEXT: Early CSE
510513
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
511-
; GCN-O1-OPTS-NEXT: AMDGPU Attributor
512-
; GCN-O1-OPTS-NEXT: FunctionPass Manager
513-
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
514514
; GCN-O1-OPTS-NEXT: CallGraph Construction
515515
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
516516
; GCN-O1-OPTS-NEXT: AMDGPU Annotate Kernel Features
@@ -759,6 +759,9 @@
759759
; GCN-O2-NEXT: Function Alias Analysis Results
760760
; GCN-O2-NEXT: Lower OpenCL enqueued blocks
761761
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
762+
; GCN-O2-NEXT: AMDGPU Attributor
763+
; GCN-O2-NEXT: FunctionPass Manager
764+
; GCN-O2-NEXT: Cycle Info Analysis
762765
; GCN-O2-NEXT: FunctionPass Manager
763766
; GCN-O2-NEXT: Infer address spaces
764767
; GCN-O2-NEXT: Expand Atomic instructions
@@ -814,9 +817,6 @@
814817
; GCN-O2-NEXT: TLS Variable Hoist
815818
; GCN-O2-NEXT: Early CSE
816819
; GCN-O2-NEXT: AMDGPU Remove Incompatible Functions
817-
; GCN-O2-NEXT: AMDGPU Attributor
818-
; GCN-O2-NEXT: FunctionPass Manager
819-
; GCN-O2-NEXT: Cycle Info Analysis
820820
; GCN-O2-NEXT: CallGraph Construction
821821
; GCN-O2-NEXT: Call Graph SCC Pass Manager
822822
; GCN-O2-NEXT: AMDGPU Annotate Kernel Features
@@ -1067,6 +1067,9 @@
10671067
; GCN-O3-NEXT: Function Alias Analysis Results
10681068
; GCN-O3-NEXT: Lower OpenCL enqueued blocks
10691069
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
1070+
; GCN-O3-NEXT: AMDGPU Attributor
1071+
; GCN-O3-NEXT: FunctionPass Manager
1072+
; GCN-O3-NEXT: Cycle Info Analysis
10701073
; GCN-O3-NEXT: FunctionPass Manager
10711074
; GCN-O3-NEXT: Infer address spaces
10721075
; GCN-O3-NEXT: Expand Atomic instructions
@@ -1134,9 +1137,6 @@
11341137
; GCN-O3-NEXT: Optimization Remark Emitter
11351138
; GCN-O3-NEXT: Global Value Numbering
11361139
; GCN-O3-NEXT: AMDGPU Remove Incompatible Functions
1137-
; GCN-O3-NEXT: AMDGPU Attributor
1138-
; GCN-O3-NEXT: FunctionPass Manager
1139-
; GCN-O3-NEXT: Cycle Info Analysis
11401140
; GCN-O3-NEXT: CallGraph Construction
11411141
; GCN-O3-NEXT: Call Graph SCC Pass Manager
11421142
; GCN-O3-NEXT: AMDGPU Annotate Kernel Features

llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
4343
; GFX9-LABEL: test_simple_indirect_call:
4444
; GFX9: ; %bb.0:
4545
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x4
46-
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s6, s9
47-
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
48-
; GFX9-NEXT: s_add_u32 s0, s0, s9
46+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
47+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
48+
; GFX9-NEXT: s_add_u32 s0, s0, s17
4949
; GFX9-NEXT: s_addc_u32 s1, s1, 0
5050
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
5151
; GFX9-NEXT: s_lshr_b32 s4, s4, 16

0 commit comments

Comments
 (0)