Skip to content

Commit c6c3bc2

Browse files
aratajewigcbot
authored andcommitted
Avoid using %sp and %fp predefined variables
If a kernel uses VLA, but it doesn't use stack calls, then we can initilize stack-related variables in a limited scope. Only stack and frame pointer need to be initilized. All other stack-related variables like `ARGV`, `RETV`, etc. don't need to be initialized as they are not needed for VLA handling. Also `SP` and `FP` don't need to be initilized as predefined `%sp` and `%fp` VISA variables. Using predefined variables is only necessary if stack calls are present in a kernel. Not using predefined `%sp` and `%fp` VISA variables allows to allocate any register for stack and frame pointers.
1 parent c057be0 commit c6c3bc2

File tree

4 files changed

+116
-1
lines changed

4 files changed

+116
-1
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,25 @@ void CShader::InitializeStackVariables()
320320
}
321321
}
322322

323+
// This function initializes the stack in a limited scope, only for the purpose
324+
// of handling VLA. It is intended to be called when VLA is used but stack calls
325+
// are not present. Only SP and PF variables are initialized. Note that these
326+
// variables are not initialized to the predefined %sp and %fp VISA variables
327+
// because it's not necessary. All other stack-related variables like ARGV,
328+
// RETV, etc. are also not initialized as they are not needed for VLA handling.
329+
void CShader::InitializeSPFPForVLA()
330+
{
331+
IGC_ASSERT_MESSAGE(!HasStackCalls(), "InitializeSPFPForVLA should only be called if stack calls are not present!");
332+
333+
// Set the SP/FP variable types to match the private pointer size defined in the data layout
334+
bool isA64Private = (GetContext()->getRegisterPointerSizeInBits(ADDRESS_SPACE_PRIVATE) == 64);
335+
336+
// create stack-pointer register
337+
m_SP = GetNewVariable(1, (isA64Private ? ISA_TYPE_UQ : ISA_TYPE_UD), (isA64Private ? EALIGN_QWORD : EALIGN_DWORD), true, 1, "SP");
338+
// create frame-pointer register
339+
m_FP = GetNewVariable(1, (isA64Private ? ISA_TYPE_UQ : ISA_TYPE_UD), (isA64Private ? EALIGN_QWORD : EALIGN_DWORD), true, 1, "FP");
340+
}
341+
323342
/// save FP of previous frame when entering a stack-call function
324343
void CShader::SaveStackState()
325344
{

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11598,7 +11598,21 @@ void EmitPass::emitReturn(llvm::ReturnInst* inst)
1159811598
/// Initializes the kernel for stack call by initializing the SP and FP
1159911599
void EmitPass::InitializeKernelStack(Function* pKernel, CVariable* stackBufferBase)
1160011600
{
11601-
m_currShader->InitializeStackVariables();
11601+
if (m_currShader->HasStackCalls())
11602+
{
11603+
m_currShader->InitializeStackVariables();
11604+
}
11605+
else
11606+
{
11607+
// If there are no stack calls in pKernel, but it uses VLA,
11608+
// stack can be initialized in a limited scope, meaning that
11609+
// only SP and FP need to be initialized.
11610+
bool hasVLA = (m_FGA && m_FGA->getGroup(pKernel) &&
11611+
m_FGA->getGroup(pKernel)->hasVariableLengthAlloca()) || pKernel->hasFnAttribute("hasVLA");
11612+
IGC_ASSERT_MESSAGE(hasVLA, "Stack initialization, without presence of stack calls, is only allowed when VLA is used.");
11613+
m_currShader->InitializeSPFPForVLA();
11614+
}
11615+
1160211616
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
1160311617
auto pModMD = pCtx->getModuleMetaData();
1160411618

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ class CShader
270270
bool hasFP() const { return m_FP != nullptr; }
271271

272272
void InitializeStackVariables();
273+
void InitializeSPFPForVLA();
273274
void SaveStackState();
274275
void RestoreStackState();
275276

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: llvm-14-plus, regkeys
9+
;
10+
; RUN: igc_opt --opaque-pointers -platformbmg -igc-emit-visa %s -regkey DumpVISAASMToConsole | FileCheck %s
11+
; ------------------------------------------------
12+
; EmitVISAPass
13+
; ------------------------------------------------
14+
15+
; This test verifies stack-related variables initilization in case a kernel
16+
; uses VLA, but it doesn't use stack calls. In such scenario, stack-related
17+
; variables can be initilized in a very limited scope, meaning that only stack
18+
; pointer and frame pointer need to be initilized. Additionally, stack and
19+
; frame pointer don't need to be initilized as predefined %sp and %fp VISA
20+
; variables, they can just be a regular VISA variables.
21+
22+
; CHECK-NOT: .decl SP{{.*}} alias=<%sp, 0>
23+
; CHECK-NOT: .decl FP{{.*}} alias=<%fp, 0>
24+
; CHECK-NOT: .decl ARGV{{.*}} alias=<%arg, 0>
25+
; CHECK-NOT: .decl RETV{{.*}} alias=<%retval, 0>
26+
27+
; CHECK: add (M1_NM, 1) SP(0,0)<1> privateBase(0,0)<0;1,0> {{.*}}(0,0)<0;1,0>
28+
; CHECK: mov (M1_NM, 1) FP(0,0)<1> SP(0,0)<0;1,0>
29+
30+
define spir_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1) %out, <8 x i32> %r0, <8 x i32> %payloadHeader, ptr %privateBase, i32 %bufferOffset, i16 %localIdX, i16 %localIdY, i16 %localIdZ) #0 {
31+
entry:
32+
%vlaSize = load i32, ptr addrspace(1) %in, align 4
33+
%vlaStackAlloca = call ptr @llvm.genx.GenISA.VLAStackAlloca(i32 0, i32 %vlaSize)
34+
%lidX = zext i16 %localIdX to i32
35+
store i32 %lidX, ptr %vlaStackAlloca
36+
; ... some other basic blocks here ...
37+
%val = load i32, ptr %vlaStackAlloca
38+
store i32 %val, ptr addrspace(1) %out
39+
ret void
40+
}
41+
42+
declare ptr @llvm.genx.GenISA.VLAStackAlloca(i32, i32)
43+
44+
attributes #0 = { "hasVLA" }
45+
46+
!IGCMetadata = !{!0}
47+
!igc.functions = !{!22}
48+
49+
!0 = !{!"ModuleMD", !1, !21}
50+
!1 = !{!"FuncMD", !2, !3}
51+
!2 = !{!"FuncMDMap[0]", ptr @test}
52+
!3 = !{!"FuncMDValue[0]", !4, !17}
53+
!4 = !{!"resAllocMD", !5}
54+
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16, !31, !32}
55+
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
56+
!7 = !{!"type", i32 0}
57+
!8 = !{!"extensionType", i32 -1}
58+
!9 = !{!"indexType", i32 -1}
59+
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
60+
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
61+
!12 = !{!"type", i32 1}
62+
!13 = !{!"indexType", i32 0}
63+
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
64+
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
65+
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
66+
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
67+
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
68+
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
69+
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
70+
!21 = !{!"isHDCFastClearShader", i1 false}
71+
!22 = !{ptr @test, !23}
72+
!23 = !{!24, !25}
73+
!24 = !{!"function_type", i32 0}
74+
!25 = !{!"implicit_arg_desc", !26, !27, !28, !29}
75+
!26 = !{i32 0}
76+
!27 = !{i32 1}
77+
!28 = !{i32 12}
78+
!29 = !{i32 14, !29}
79+
!30 = !{!"explicit_arg_num", i32 2}
80+
!31 = !{!"argAllocMDListVec[6]", !7, !8, !9}
81+
!32 = !{!"argAllocMDListVec[7]", !7, !8, !9}

0 commit comments

Comments
 (0)