@@ -18036,6 +18036,33 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18036
18036
// eOffset is in bytes
18037
18037
// offset corresponds to Int2Ptr operand obtained during pattern matching
18038
18038
CVariable* eOffset = GetSymbol(varOffset);
18039
+
18040
+ // This operation is to avoid unaligned addresses on inactive lanes.
18041
+ // The most accurate way to address a subspan issue caused by non-uniformness
18042
+ // of quads/subgroups related to computations of derivatives is to copy the nearest neighbor
18043
+ // as a part of its quad/subgroup instead of enabling a no_mask flag.
18044
+ bool alignAddressInInactiveLanes =
18045
+ m_encoder->IsSubSpanDestination() && !eOffset->IsUniform();
18046
+ if (alignAddressInInactiveLanes)
18047
+ {
18048
+ uint32_t origTypeSize = CEncoder::GetCISADataTypeSize(eOffset->GetType());
18049
+ const VISA_Type aliasType = ISA_TYPE_UW;
18050
+ uint32_t aliasTypeSize = CEncoder::GetCISADataTypeSize(aliasType);
18051
+ uint32_t stride = origTypeSize / aliasTypeSize;
18052
+ CVariable* shortOffVar = m_currShader->GetNewAlias(eOffset, aliasType, 0,
18053
+ eOffset->GetNumberElement() * origTypeSize / aliasTypeSize);
18054
+ CVariable* maskVar = m_currShader->ImmToVariable(~(align - 1), shortOffVar->GetType());
18055
+ for (uint instance = 0; instance < eOffset->GetNumberInstance(); instance++)
18056
+ {
18057
+ m_encoder->SetSecondHalf(instance == 1);
18058
+ m_encoder->SetSrcRegion(0, stride, 1, 0);
18059
+ m_encoder->SetDstRegion(stride);
18060
+ m_encoder->And(shortOffVar, shortOffVar, maskVar);
18061
+ m_encoder->Push();
18062
+ m_encoder->SetSecondHalf(false);
18063
+ }
18064
+ }
18065
+
18039
18066
if (useA32)
18040
18067
{
18041
18068
eOffset = TruncatePointer(eOffset);
0 commit comments