Skip to content

Commit b335c25

Browse files
jaladreipsigcbot
authored andcommitted
In synchronous raytracing, emulate LSC fence with LSC load
In synchronous raytracing, emulate LSC fence with LSC load
1 parent 8f500c5 commit b335c25

File tree

9 files changed

+64
-8
lines changed

9 files changed

+64
-8
lines changed

IGC/AdaptorCommon/RayTracing/AutoGenRTStackAccessPrivateOS.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,3 +1832,12 @@ auto* _getSyncStackID_Xe2(const Twine& _ReturnName = "")
18321832
return V_6;
18331833
}
18341834

1835+
auto* _getHitAddress_Xe(Value* arg_0, Value* arg_1, const Twine& _ReturnName = "")
1836+
{
1837+
auto* V_2 = CreateInBoundsGEP(_struct_RTStackFormat__RTStack(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(0) });
1838+
auto* V_3 = CreateInBoundsGEP(_struct_RTStackFormat__RTStack(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(1) });
1839+
auto* V_4 = CreateSelect(arg_1, V_2, V_3);
1840+
auto* V_5 = CreateBitCast(V_4, PointerType::get(getInt8Ty(), arg_0->getType()->getPointerAddressSpace()), _ReturnName);
1841+
return V_5;
1842+
}
1843+

IGC/AdaptorCommon/RayTracing/RTBuilder.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,3 +1994,17 @@ void RTBuilder::commitProceduralPrimitiveHit(
19941994
#undef STYLE
19951995
}
19961996
}
1997+
1998+
Value* RTBuilder::getHitAddress(StackPointerVal* StackPtr, bool Committed)
1999+
{
2000+
switch (getMemoryStyle())
2001+
{
2002+
#define STYLE(X) \
2003+
case RTMemoryStyle::X: \
2004+
return _getHitAddress_##X(StackPtr, VAdapt{ *this, Committed });
2005+
#include "RayTracingMemoryStyle.h"
2006+
#undef STYLE
2007+
}
2008+
IGC_ASSERT(0);
2009+
return {};
2010+
}

IGC/AdaptorCommon/RayTracing/RTBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,8 @@ class RTBuilder : public IGCIRBuilder<>
496496
Value* getCandidateType(SyncStackPointerVal* SMStackPtr);
497497

498498
void commitProceduralPrimitiveHit(SyncStackPointerVal* SMStackPtr, Value* THit);
499+
500+
Value* getHitAddress(StackPointerVal* StackPtr, bool Committed);
499501
};
500502

501503
} // namespace llvm

IGC/AdaptorCommon/RayTracing/TraceRayInlineLoweringPass.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,36 @@ Value* TraceRayInlineLoweringPass::emitProceedMainBody(
361361
Value* ShdowMemRTCtrlPtr = getShMemRTCtrl(builder, queryObjIndex);
362362
Value* traceRayCtrl = builder.getSyncTraceRayControl(ShdowMemRTCtrlPtr);
363363

364-
builder.CreateLSCFence(LSC_UGM, LSC_SCOPE_LOCAL, LSC_FENCE_OP_NONE);
364+
if (IGC_IS_FLAG_ENABLED(DisableLoadAsFenceOpInRaytracing))
365+
{
366+
builder.CreateLSCFence(LSC_UGM, LSC_SCOPE_LOCAL, LSC_FENCE_OP_NONE);
367+
}
368+
else
369+
{
370+
// this is an optimization
371+
// it's based on the idea that stores and loads are queued, so if a load completes, all stores before it are also completed
372+
// the requirement is that the load and the store should use the same address, so we use the potential hit (last write in copyMemHitInProceed)
373+
auto* potentialHit = builder.getHitAddress(HWStackPointer, false);
374+
375+
auto* M = builder.GetInsertPoint()->getModule();
376+
auto* fn = GenISAIntrinsic::getDeclaration(
377+
M,
378+
GenISAIntrinsic::GenISA_LSCLoadWithSideEffects,
379+
{ builder.getInt32Ty(), potentialHit->getType() }
380+
);
381+
382+
builder.CreateCall(
383+
fn,
384+
{
385+
potentialHit,
386+
builder.getInt32(0),
387+
builder.getInt32(LSC_DATA_SIZE_32b),
388+
builder.getInt32(LSC_DATA_ELEMS_1),
389+
builder.getInt32(LSC_L1C_WT_L3C_WB)
390+
},
391+
VALUE_NAME("LSCLoadAsFence")
392+
);
393+
}
365394

366395
//TraceRay
367396
Value* retSyncRT = builder.createSyncTraceRay(
@@ -470,9 +499,9 @@ void TraceRayInlineLoweringPass::LowerSyncStackToShadowMemory(Function& F)
470499

471500
if (IGC_IS_FLAG_DISABLED(DisableInvalidateRTStackAfterLastRead))
472501
{
473-
auto* LSCInvalidate = GenISAIntrinsic::getDeclaration(
502+
auto* fn = GenISAIntrinsic::getDeclaration(
474503
F.getParent(),
475-
GenISAIntrinsic::GenISA_LSCInvalidate,
504+
GenISAIntrinsic::GenISA_LSCLoadWithSideEffects,
476505
{ builder.getInt32Ty(), HWStackPointer->getType() }
477506
);
478507

@@ -487,7 +516,7 @@ void TraceRayInlineLoweringPass::LowerSyncStackToShadowMemory(Function& F)
487516
for (uint i = 0; i < getSyncStackSize() / m_CGCtx->platform.LSCCachelineSize(); i++)
488517
{
489518
builder.CreateCall(
490-
LSCInvalidate,
519+
fn,
491520
{
492521
HWStackPointer,
493522
builder.getInt32(i * m_CGCtx->platform.LSCCachelineSize()),

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8980,7 +8980,7 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
89808980
case GenISAIntrinsic::GenISA_LSCStore:
89818981
case GenISAIntrinsic::GenISA_LSCStoreBlock:
89828982
case GenISAIntrinsic::GenISA_LSCLoad:
8983-
case GenISAIntrinsic::GenISA_LSCInvalidate:
8983+
case GenISAIntrinsic::GenISA_LSCLoadWithSideEffects:
89848984
case GenISAIntrinsic::GenISA_LSCLoadBlock:
89858985
case GenISAIntrinsic::GenISA_LSCStoreCmask:
89868986
case GenISAIntrinsic::GenISA_LSCLoadCmask:
@@ -23151,7 +23151,7 @@ void EmitPass::emitLSCIntrinsic(llvm::GenIntrinsicInst* GII)
2315123151
GenISAIntrinsic::ID iid = GII->getIntrinsicID();
2315223152
switch (iid) {
2315323153
case GenISAIntrinsic::GenISA_LSCLoad:
23154-
case GenISAIntrinsic::GenISA_LSCInvalidate:
23154+
case GenISAIntrinsic::GenISA_LSCLoadWithSideEffects:
2315523155
case GenISAIntrinsic::GenISA_LSCLoadBlock:
2315623156
emitLscIntrinsicLoad(GII);
2315723157
break;

IGC/Compiler/CISACodeGen/helper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1246,7 +1246,7 @@ namespace IGC
12461246
case GenISAIntrinsic::GenISA_simdBlockRead:
12471247
case GenISAIntrinsic::GenISA_simdBlockWrite:
12481248
case GenISAIntrinsic::GenISA_LSCLoad:
1249-
case GenISAIntrinsic::GenISA_LSCInvalidate:
1249+
case GenISAIntrinsic::GenISA_LSCLoadWithSideEffects:
12501250
case GenISAIntrinsic::GenISA_LSCLoadBlock:
12511251
case GenISAIntrinsic::GenISA_LSCLoadCmask:
12521252
pBuffer = intr->getOperand(0);

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2871,7 +2871,7 @@
28712871
("int", "cache controls options (LSC_CACHE_OPTS)")],
28722872
"ReadMem"]],
28732873
####################################################################################################
2874-
"GenISA_LSCInvalidate": ["LSC load meant to be used for cache invalidation",
2874+
"GenISA_LSCLoadWithSideEffects": ["LSC load meant to be used just for issuing load instruction (i.e. making sure writes complete or to invalidate caches)",
28752875
[("anyint", ""),
28762876
[("anyptr", "address of value to invalidate"),
28772877
("int", "immediate offset (in bytes)"),

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,7 @@ DECLARE_IGC_GROUP("Raytracing Options")
955955
DECLARE_IGC_REGKEY(bool, EnableFillScheduling, false, "Schedule fills for reduced register pressure", false)
956956
DECLARE_IGC_REGKEY(bool, DisableSWStackOffsetElision, false, "Avoid loading offseting when known at compile-time", false)
957957
DECLARE_IGC_REGKEY(DWORD, OverrideTMax, 0, "Force TMax to the given value. When 0, do nothing.", false)
958+
DECLARE_IGC_REGKEY(bool, DisableLoadAsFenceOpInRaytracing, true, "Disable load as fence op in raytracing (rayquery only)", false)
958959

959960
DECLARE_IGC_GROUP("VectorCompiler Options")
960961
DECLARE_IGC_REGKEY(bool, DisableEuFusion, false, "Require disable of EU fusion", true)

IGC/common/igc_regkeys.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,7 @@ static void setImpliedIGCKeys()
878878
IGC_SET_IMPLIED_REGKEY(DisableRayTracingOptimizations, 1, DisableDPSE, true);
879879
IGC_SET_IMPLIED_REGKEY(DisableRayTracingOptimizations, 1, DisableSWStackOffsetElision, true);
880880
IGC_SET_IMPLIED_REGKEY(DisableRayTracingOptimizations, 1, DisableInvalidateRTStackAfterLastRead, true);
881+
IGC_SET_IMPLIED_REGKEY(DisableRayTracingOptimizations, 1, DisableLoadAsFenceOpInRaytracing, true);
881882

882883
IGC_SET_IMPLIED_REGKEY(ForceRTRetry, 1, RetryManagerFirstStateId, 1);
883884

0 commit comments

Comments
 (0)