Skip to content

Commit 3ac675e

Browse files
sys-igcigcbot
authored andcommitted
[Autobackout][FunctionalRegression]Revert of change: ccfd505: Classic resource loops should emit lifetime.start
The 'normal' resource loops also have this issue when they exist inside another loop, so they should also explicitly call lifetime.start.
1 parent a1b6a34 commit 3ac675e

File tree

4 files changed

+103
-134
lines changed

4 files changed

+103
-134
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5340,7 +5340,7 @@ void EmitPass::emitLdInstruction(llvm::Instruction* inst)
53405340
Value* ptr = inst->getOperand(textureArgIdx);
53415341
ResourceDescriptor resource = GetResourceVariable(ptr);
53425342
uint ResourceLoopMarker = m_RLA->GetResourceLoopMarker(inst);
5343-
bool needLoop = ResourceLoopHeader(dst, resource, flag, label, ResourceLoopMarker);
5343+
bool needLoop = ResourceLoopHeader(resource, flag, label, ResourceLoopMarker);
53445344
ResourceLoopSubIteration(resource, flag, label, ResourceLoopMarker);
53455345

53465346
m_encoder->SetPredicate(flag);
@@ -8364,7 +8364,7 @@ void EmitPass::emitInfoInstruction(InfoIntrinsic* inst)
83648364

83658365
uint label = 0;
83668366
CVariable* flag = nullptr;
8367-
bool needLoop = ResourceLoopHeader(tempDest, resource, flag, label);
8367+
bool needLoop = ResourceLoopHeader(resource, flag, label);
83688368
ResourceLoopSubIteration(resource, flag, label);
83698369

83708370
if (opCode == llvm_readsurfacetypeandformat)
@@ -8487,7 +8487,7 @@ void EmitPass::emitSurfaceInfo(GenIntrinsicInst* inst)
84878487
}
84888488
uint label = 0;
84898489
CVariable* flag = nullptr;
8490-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
8490+
bool needLoop = ResourceLoopHeader(resource, flag, label);
84918491
ResourceLoopSubIteration(resource, flag, label);
84928492
CVariable* payload = m_currShader->GetNewVariable(8, ISA_TYPE_UD, EALIGN_GRF, CName::NONE);
84938493

@@ -8648,8 +8648,7 @@ void EmitPass::emitGather4Instruction(SamplerGatherIntrinsic* inst)
86488648
bool feedbackEnable = (m_destination->GetNumberElement() / numLanes(m_currShader->m_SIMDSize) == 5) ? true : false;
86498649
uint label = 0;
86508650
CVariable* flag = nullptr;
8651-
CVariable* dest = dst ? dst : m_destination;
8652-
bool needLoop = ResourceLoopHeader(dest, resource, sampler, flag, label);
8651+
bool needLoop = ResourceLoopHeader(resource, sampler, flag, label);
86538652
ResourceLoopSubIteration(resource, sampler, flag, label);
86548653
m_encoder->SetPredicate(flag);
86558654
m_encoder->Gather4Inst(
@@ -8741,7 +8740,7 @@ void EmitPass::emitLdmsInstruction(llvm::Instruction* inst)
87418740
bool feedbackEnable = writeMask.isSet(4);
87428741
uint label = 0;
87438742
CVariable* flag = nullptr;
8744-
bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
8743+
bool needLoop = ResourceLoopHeader(resource, flag, label);
87458744
ResourceLoopSubIteration(resource, flag, label);
87468745
m_encoder->SetPredicate(flag);
87478746
m_encoder->LoadMS(opCode, writeMask.getEM(), offset, resource, numSources, dst, payload, feedbackEnable);
@@ -11240,7 +11239,7 @@ void EmitPass::emitLoad3DInner(LdRawIntrinsic* inst, ResourceDescriptor& resourc
1124011239
{
1124111240
uint label = 0;
1124211241
CVariable* flag = nullptr;
11243-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
11242+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1124411243
ResourceLoopSubIteration(resource, flag, label);
1124511244
uint sizeInBits = GetPrimitiveTypeSizeInRegisterInBits(inst->getType());
1124611245
IGC_ASSERT_MESSAGE((sizeInBits == 8) || (sizeInBits == 16) || (sizeInBits == 32) || (sizeInBits == 64) || (sizeInBits == 96) || (sizeInBits == 128),
@@ -12560,7 +12559,7 @@ void EmitPass::emitStore3DInner(Value* pllValToStore, Value* pllDstPtr, Value* p
1256012559

1256112560
uint label = 0;
1256212561
CVariable* flag = nullptr;
12563-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
12562+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1256412563
ResourceLoopSubIteration(resource, flag, label);
1256512564
if (sizeInBits == 32)
1256612565
{
@@ -16111,8 +16110,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *dstAddr,
1611116110
}
1611216111
uint label = 0;
1611316112
CVariable* flag = nullptr;
16114-
CVariable* dest = pDst ? pDst : m_destination;
16115-
bool needLoop = ResourceLoopHeader(dest, resource, flag, label);
16113+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1611616114
ResourceLoopSubIteration(resource, flag, label);
1611716115
if (shouldGenerateLSC(pInst)) {
1611816116
auto cacheOpts = LSC_DEFAULT_CACHING;
@@ -16322,7 +16320,7 @@ void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
1632216320

1632316321
uint label = 0;
1632416322
CVariable* flag = nullptr;
16325-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16323+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1632616324
ResourceLoopSubIteration(resource, flag, label);
1632716325
for (uint i = 0; i < loopIter; ++i)
1632816326
{
@@ -16431,7 +16429,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
1643116429
{
1643216430
uint label = 0;
1643316431
CVariable* flag = nullptr;
16434-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16432+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1643516433
ResourceLoopSubIteration(resource, flag, label);
1643616434
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
1643716435
SIMDMode instWidth = std::min(
@@ -16553,7 +16551,7 @@ void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
1655316551
{
1655416552
uint label = 0;
1655516553
CVariable* flag = nullptr;
16556-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16554+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1655716555
ResourceLoopSubIteration(resource, flag, label);
1655816556
uint parameterLength = 4;
1655916557

@@ -17203,7 +17201,7 @@ void EmitPass::emitAtomicCounter(llvm::GenIntrinsicInst* pInsn)
1720317201

1720417202
uint label = 0;
1720517203
CVariable* flag = nullptr;
17206-
bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
17204+
bool needLoop = ResourceLoopHeader(resource, flag, label);
1720717205
ResourceLoopSubIteration(resource, flag, label);
1720817206

1720917207
uint messageDescriptor = encodeMessageDescriptorForAtomicUnaryOp(
@@ -20095,7 +20093,7 @@ void EmitPass::emitLSCTypedRead(llvm::Instruction* pInsn)
2009520093
{
2009620094
uint label = 0;
2009720095
CVariable* flag = nullptr;
20098-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20096+
bool needLoop = ResourceLoopHeader(resource, flag, label);
2009920097
ResourceLoopSubIteration(resource, flag, label);
2010020098
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
2010120099
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -20193,7 +20191,7 @@ void EmitPass::emitLSCTypedWrite(llvm::Instruction* pInsn)
2019320191

2019420192
uint label = 0;
2019520193
CVariable* flag = nullptr;
20196-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20194+
bool needLoop = ResourceLoopHeader(resource, flag, label);
2019720195
ResourceLoopSubIteration(resource, flag, label);
2019820196
uint parameterLength = 4;
2019920197

@@ -20450,7 +20448,7 @@ void EmitPass::emitLSCAtomicTyped(llvm::GenIntrinsicInst* inst)
2045020448

2045120449
uint label = 0;
2045220450
CVariable* flag = nullptr;
20453-
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20451+
bool needLoop = ResourceLoopHeader(resource, flag, label);
2045420452
ResourceLoopSubIteration(resource, flag, label);
2045520453
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
2045620454
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -21726,21 +21724,19 @@ SamplerDescriptor EmitPass::GetSamplerVariable(Value* sampleOp)
2172621724
}
2172721725

2172821726
bool EmitPass::ResourceLoopHeader(
21729-
const CVariable* destination,
2173021727
ResourceDescriptor& resource,
2173121728
CVariable*& flag,
2173221729
uint& label,
2173321730
uint ResourceLoopMarker,
2173421731
int* subInteration)
2173521732
{
2173621733
SamplerDescriptor sampler;
21737-
return ResourceLoopHeader(destination, resource, sampler, flag, label, ResourceLoopMarker, subInteration);
21734+
return ResourceLoopHeader(resource, sampler, flag, label, ResourceLoopMarker, subInteration);
2173821735
}
2173921736

2174021737
// Insert loop header to handle non-uniform resource and sampler
2174121738
// This generates sub-optimal code for SIMD32, this can be revisited if we need better code generation
2174221739
bool EmitPass::ResourceLoopHeader(
21743-
const CVariable* destination,
2174421740
ResourceDescriptor& resource,
2174521741
SamplerDescriptor& sampler,
2174621742
CVariable*& flag,
@@ -21778,8 +21774,6 @@ bool EmitPass::ResourceLoopHeader(
2177821774
}
2177921775
m_currShader->IncNumSampleBallotLoops();
2178021776

21781-
m_encoder->Lifetime(LIFETIME_START, (CVariable*)destination);
21782-
2178321777
label = m_encoder->GetNewLabelID("_opt_resource_loop");
2178421778
m_encoder->AddDivergentResourceLoopLabel(label);
2178521779
m_encoder->Push();

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -802,15 +802,13 @@ class EmitPass : public llvm::FunctionPass
802802

803803
// generate loop header to process sample instruction with varying resource/sampler
804804
bool ResourceLoopHeader(
805-
const CVariable* destination,
806805
ResourceDescriptor& resource,
807806
SamplerDescriptor& sampler,
808807
CVariable*& flag,
809808
uint& label,
810809
uint ResourceLoopMarker = 0,
811810
int* subInteration = nullptr);
812811
bool ResourceLoopHeader(
813-
const CVariable* destination,
814812
ResourceDescriptor& resource,
815813
CVariable*& flag,
816814
uint& label,
@@ -872,7 +870,7 @@ class EmitPass : public llvm::FunctionPass
872870
}
873871

874872
// label resource loop
875-
ResourceLoopHeader(currentDestination, resource, sampler, flag, label, ResourceLoopMarker, &subInteration);
873+
ResourceLoopHeader(resource, sampler, flag, label, ResourceLoopMarker, &subInteration);
876874
}
877875

878876
// subInteration == 0 could mean no resource loop tag indicated

IGC/Compiler/tests/EmitVISAPass/resource-loop-unroll-iteration-lsc.ll

Lines changed: 53 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -17,80 +17,68 @@
1717
@ThreadGroupSize_Z = constant i32 1
1818

1919
define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
20-
; CHECK: _main_0:
21-
; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
22-
; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
23-
; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w
24-
; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0>
25-
; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0>
26-
; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud
27-
; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud
28-
; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud
29-
; CHECK-NEXT: lifetime.start call_
30-
;
31-
; CHECK: _test1_001__opt_resource_loop:
32-
; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud
33-
; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud
34-
; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0>
35-
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
36-
; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0>
37-
; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w
38-
; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0>
39-
; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d
40-
; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
41-
; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32
42-
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6
43-
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6
44-
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
45-
; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
46-
; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud
47-
; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
48-
; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0>
49-
; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d
50-
; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
51-
; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5
52-
; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32
53-
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7
54-
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7
55-
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
56-
; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0>
57-
; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud
58-
; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w
59-
; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0>
60-
; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d
61-
; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
62-
; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5
63-
; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32
64-
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8
65-
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8
66-
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
67-
; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0>
68-
; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud
69-
; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w
70-
; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0>
71-
; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d
72-
; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
73-
; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5
74-
; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32
75-
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9
76-
; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
77-
; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
78-
; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0>
79-
; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d
80-
; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
81-
; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
82-
; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
83-
; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
84-
; CHECK-NEXT: ret (M1, 1)
20+
entry:
21+
; CHECK: _main_0:
8522

8623
%svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
24+
; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
8725

8826
%nonuniform = zext i16 %svn to i32
27+
; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
8928

9029
%NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
9130
%offset = add i32 %src1, 1
31+
; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w
32+
; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0>
33+
; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0>
9234

9335
%call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
36+
; CHECK: _test1_001__opt_resource_loop:
37+
; CHECK: setp (M1_NM, 16) P4 0x0:ud
38+
; CHECK: setp (M1_NM, 16) P5 0x0:ud
39+
; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0>
40+
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
41+
; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0>
42+
; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w
43+
; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0>
44+
; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d
45+
; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
46+
; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32
47+
; CHECK: or (M1_NM, 16) P4 P4 P6
48+
; CHECK: xor (M1_NM, 16) P5 P5 P6
49+
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
50+
; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
51+
; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud
52+
; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
53+
; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0>
54+
; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d
55+
; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
56+
; CHECK: and (M1_NM, 16) P7 P7 P5
57+
; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32
58+
; CHECK: or (M1_NM, 16) P4 P4 P7
59+
; CHECK: xor (M1_NM, 16) P5 P5 P7
60+
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
61+
; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0>
62+
; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud
63+
; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w
64+
; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0>
65+
; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d
66+
; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
67+
; CHECK: and (M1_NM, 16) P8 P8 P5
68+
; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32
69+
; CHECK: or (M1_NM, 16) P4 P4 P8
70+
; CHECK: xor (M1_NM, 16) P5 P5 P8
71+
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
72+
; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0>
73+
; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud
74+
; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w
75+
; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0>
76+
; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d
77+
; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
78+
; CHECK: and (M1_NM, 16) P9 P9 P5
79+
; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32
80+
; CHECK: or (M1_NM, 16) P4 P4 P9
81+
; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
9482
%out = extractelement <3 x i32> %call, i32 %val
9583
store i32 %out, i32 addrspace(1)* %dst, align 1
9684
ret void

0 commit comments

Comments
 (0)