Skip to content

Commit ccfd505

Browse files
ichenkaiigcbot
authored andcommitted
Pass destination to resource loop header
Pass destination to resource loop header to make a safe lifetime predicate.
1 parent 0a7b667 commit ccfd505

File tree

4 files changed

+134
-103
lines changed

4 files changed

+134
-103
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5340,7 +5340,7 @@ void EmitPass::emitLdInstruction(llvm::Instruction* inst)
53405340
Value* ptr = inst->getOperand(textureArgIdx);
53415341
ResourceDescriptor resource = GetResourceVariable(ptr);
53425342
uint ResourceLoopMarker = m_RLA->GetResourceLoopMarker(inst);
5343-
bool needLoop = ResourceLoopHeader(resource, flag, label, ResourceLoopMarker);
5343+
bool needLoop = ResourceLoopHeader(dst, resource, flag, label, ResourceLoopMarker);
53445344
ResourceLoopSubIteration(resource, flag, label, ResourceLoopMarker);
53455345

53465346
m_encoder->SetPredicate(flag);
@@ -8364,7 +8364,7 @@ void EmitPass::emitInfoInstruction(InfoIntrinsic* inst)
83648364

83658365
uint label = 0;
83668366
CVariable* flag = nullptr;
8367-
bool needLoop = ResourceLoopHeader(resource, flag, label);
8367+
bool needLoop = ResourceLoopHeader(tempDest, resource, flag, label);
83688368
ResourceLoopSubIteration(resource, flag, label);
83698369

83708370
if (opCode == llvm_readsurfacetypeandformat)
@@ -8487,7 +8487,7 @@ void EmitPass::emitSurfaceInfo(GenIntrinsicInst* inst)
84878487
}
84888488
uint label = 0;
84898489
CVariable* flag = nullptr;
8490-
bool needLoop = ResourceLoopHeader(resource, flag, label);
8490+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
84918491
ResourceLoopSubIteration(resource, flag, label);
84928492
CVariable* payload = m_currShader->GetNewVariable(8, ISA_TYPE_UD, EALIGN_GRF, CName::NONE);
84938493

@@ -8648,7 +8648,8 @@ void EmitPass::emitGather4Instruction(SamplerGatherIntrinsic* inst)
86488648
bool feedbackEnable = (m_destination->GetNumberElement() / numLanes(m_currShader->m_SIMDSize) == 5) ? true : false;
86498649
uint label = 0;
86508650
CVariable* flag = nullptr;
8651-
bool needLoop = ResourceLoopHeader(resource, sampler, flag, label);
8651+
CVariable* dest = dst ? dst : m_destination;
8652+
bool needLoop = ResourceLoopHeader(dest, resource, sampler, flag, label);
86528653
ResourceLoopSubIteration(resource, sampler, flag, label);
86538654
m_encoder->SetPredicate(flag);
86548655
m_encoder->Gather4Inst(
@@ -8740,7 +8741,7 @@ void EmitPass::emitLdmsInstruction(llvm::Instruction* inst)
87408741
bool feedbackEnable = writeMask.isSet(4);
87418742
uint label = 0;
87428743
CVariable* flag = nullptr;
8743-
bool needLoop = ResourceLoopHeader(resource, flag, label);
8744+
bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
87448745
ResourceLoopSubIteration(resource, flag, label);
87458746
m_encoder->SetPredicate(flag);
87468747
m_encoder->LoadMS(opCode, writeMask.getEM(), offset, resource, numSources, dst, payload, feedbackEnable);
@@ -11239,7 +11240,7 @@ void EmitPass::emitLoad3DInner(LdRawIntrinsic* inst, ResourceDescriptor& resourc
1123911240
{
1124011241
uint label = 0;
1124111242
CVariable* flag = nullptr;
11242-
bool needLoop = ResourceLoopHeader(resource, flag, label);
11243+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
1124311244
ResourceLoopSubIteration(resource, flag, label);
1124411245
uint sizeInBits = GetPrimitiveTypeSizeInRegisterInBits(inst->getType());
1124511246
IGC_ASSERT_MESSAGE((sizeInBits == 8) || (sizeInBits == 16) || (sizeInBits == 32) || (sizeInBits == 64) || (sizeInBits == 96) || (sizeInBits == 128),
@@ -12559,7 +12560,7 @@ void EmitPass::emitStore3DInner(Value* pllValToStore, Value* pllDstPtr, Value* p
1255912560

1256012561
uint label = 0;
1256112562
CVariable* flag = nullptr;
12562-
bool needLoop = ResourceLoopHeader(resource, flag, label);
12563+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
1256312564
ResourceLoopSubIteration(resource, flag, label);
1256412565
if (sizeInBits == 32)
1256512566
{
@@ -16110,7 +16111,8 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *dstAddr,
1611016111
}
1611116112
uint label = 0;
1611216113
CVariable* flag = nullptr;
16113-
bool needLoop = ResourceLoopHeader(resource, flag, label);
16114+
CVariable* dest = pDst ? pDst : m_destination;
16115+
bool needLoop = ResourceLoopHeader(dest, resource, flag, label);
1611416116
ResourceLoopSubIteration(resource, flag, label);
1611516117
if (shouldGenerateLSC(pInst)) {
1611616118
auto cacheOpts = LSC_DEFAULT_CACHING;
@@ -16320,7 +16322,7 @@ void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
1632016322

1632116323
uint label = 0;
1632216324
CVariable* flag = nullptr;
16323-
bool needLoop = ResourceLoopHeader(resource, flag, label);
16325+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
1632416326
ResourceLoopSubIteration(resource, flag, label);
1632516327
for (uint i = 0; i < loopIter; ++i)
1632616328
{
@@ -16429,7 +16431,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
1642916431
{
1643016432
uint label = 0;
1643116433
CVariable* flag = nullptr;
16432-
bool needLoop = ResourceLoopHeader(resource, flag, label);
16434+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
1643316435
ResourceLoopSubIteration(resource, flag, label);
1643416436
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
1643516437
SIMDMode instWidth = std::min(
@@ -16551,7 +16553,7 @@ void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
1655116553
{
1655216554
uint label = 0;
1655316555
CVariable* flag = nullptr;
16554-
bool needLoop = ResourceLoopHeader(resource, flag, label);
16556+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
1655516557
ResourceLoopSubIteration(resource, flag, label);
1655616558
uint parameterLength = 4;
1655716559

@@ -17201,7 +17203,7 @@ void EmitPass::emitAtomicCounter(llvm::GenIntrinsicInst* pInsn)
1720117203

1720217204
uint label = 0;
1720317205
CVariable* flag = nullptr;
17204-
bool needLoop = ResourceLoopHeader(resource, flag, label);
17206+
bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
1720517207
ResourceLoopSubIteration(resource, flag, label);
1720617208

1720717209
uint messageDescriptor = encodeMessageDescriptorForAtomicUnaryOp(
@@ -20093,7 +20095,7 @@ void EmitPass::emitLSCTypedRead(llvm::Instruction* pInsn)
2009320095
{
2009420096
uint label = 0;
2009520097
CVariable* flag = nullptr;
20096-
bool needLoop = ResourceLoopHeader(resource, flag, label);
20098+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
2009720099
ResourceLoopSubIteration(resource, flag, label);
2009820100
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
2009920101
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -20191,7 +20193,7 @@ void EmitPass::emitLSCTypedWrite(llvm::Instruction* pInsn)
2019120193

2019220194
uint label = 0;
2019320195
CVariable* flag = nullptr;
20194-
bool needLoop = ResourceLoopHeader(resource, flag, label);
20196+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
2019520197
ResourceLoopSubIteration(resource, flag, label);
2019620198
uint parameterLength = 4;
2019720199

@@ -20448,7 +20450,7 @@ void EmitPass::emitLSCAtomicTyped(llvm::GenIntrinsicInst* inst)
2044820450

2044920451
uint label = 0;
2045020452
CVariable* flag = nullptr;
20451-
bool needLoop = ResourceLoopHeader(resource, flag, label);
20453+
bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
2045220454
ResourceLoopSubIteration(resource, flag, label);
2045320455
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
2045420456
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -21724,19 +21726,21 @@ SamplerDescriptor EmitPass::GetSamplerVariable(Value* sampleOp)
2172421726
}
2172521727

2172621728
bool EmitPass::ResourceLoopHeader(
21729+
const CVariable* destination,
2172721730
ResourceDescriptor& resource,
2172821731
CVariable*& flag,
2172921732
uint& label,
2173021733
uint ResourceLoopMarker,
2173121734
int* subInteration)
2173221735
{
2173321736
SamplerDescriptor sampler;
21734-
return ResourceLoopHeader(resource, sampler, flag, label, ResourceLoopMarker, subInteration);
21737+
return ResourceLoopHeader(destination, resource, sampler, flag, label, ResourceLoopMarker, subInteration);
2173521738
}
2173621739

2173721740
// Insert loop header to handle non-uniform resource and sampler
2173821741
// This generates sub-optimal code for SIMD32, this can be revisited if we need better code generation
2173921742
bool EmitPass::ResourceLoopHeader(
21743+
const CVariable* destination,
2174021744
ResourceDescriptor& resource,
2174121745
SamplerDescriptor& sampler,
2174221746
CVariable*& flag,
@@ -21774,6 +21778,8 @@ bool EmitPass::ResourceLoopHeader(
2177421778
}
2177521779
m_currShader->IncNumSampleBallotLoops();
2177621780

21781+
m_encoder->Lifetime(LIFETIME_START, (CVariable*)destination);
21782+
2177721783
label = m_encoder->GetNewLabelID("_opt_resource_loop");
2177821784
m_encoder->AddDivergentResourceLoopLabel(label);
2177921785
m_encoder->Push();

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -802,13 +802,15 @@ class EmitPass : public llvm::FunctionPass
802802

803803
// generate loop header to process sample instruction with varying resource/sampler
804804
bool ResourceLoopHeader(
805+
const CVariable* destination,
805806
ResourceDescriptor& resource,
806807
SamplerDescriptor& sampler,
807808
CVariable*& flag,
808809
uint& label,
809810
uint ResourceLoopMarker = 0,
810811
int* subInteration = nullptr);
811812
bool ResourceLoopHeader(
813+
const CVariable* destination,
812814
ResourceDescriptor& resource,
813815
CVariable*& flag,
814816
uint& label,
@@ -870,7 +872,7 @@ class EmitPass : public llvm::FunctionPass
870872
}
871873

872874
// label resource loop
873-
ResourceLoopHeader(resource, sampler, flag, label, ResourceLoopMarker, &subInteration);
875+
ResourceLoopHeader(currentDestination, resource, sampler, flag, label, ResourceLoopMarker, &subInteration);
874876
}
875877

876878
// subInteration == 0 could mean no resource loop tag indicated

IGC/Compiler/tests/EmitVISAPass/resource-loop-unroll-iteration-lsc.ll

Lines changed: 65 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,68 +17,80 @@
1717
@ThreadGroupSize_Z = constant i32 1
1818

1919
define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
20-
entry:
21-
; CHECK: _main_0:
20+
; CHECK: _main_0:
21+
; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
22+
; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
23+
; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w
24+
; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0>
25+
; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0>
26+
; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud
27+
; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud
28+
; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud
29+
; CHECK-NEXT: lifetime.start call_
30+
;
31+
; CHECK: _test1_001__opt_resource_loop:
32+
; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud
33+
; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud
34+
; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0>
35+
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
36+
; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0>
37+
; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w
38+
; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0>
39+
; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d
40+
; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
41+
; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32
42+
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6
43+
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6
44+
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
45+
; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
46+
; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud
47+
; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
48+
; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0>
49+
; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d
50+
; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
51+
; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5
52+
; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32
53+
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7
54+
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7
55+
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
56+
; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0>
57+
; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud
58+
; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w
59+
; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0>
60+
; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d
61+
; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
62+
; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5
63+
; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32
64+
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8
65+
; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8
66+
; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5
67+
; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0>
68+
; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud
69+
; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w
70+
; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0>
71+
; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d
72+
; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
73+
; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5
74+
; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32
75+
; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9
76+
; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
77+
; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
78+
; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0>
79+
; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d
80+
; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
81+
; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
82+
; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
83+
; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
84+
; CHECK-NEXT: ret (M1, 1)
2285

2386
%svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
24-
; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
2587

2688
%nonuniform = zext i16 %svn to i32
27-
; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
2889

2990
%NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
3091
%offset = add i32 %src1, 1
31-
; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w
32-
; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0>
33-
; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0>
3492

3593
%call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
36-
; CHECK: _test1_001__opt_resource_loop:
37-
; CHECK: setp (M1_NM, 16) P4 0x0:ud
38-
; CHECK: setp (M1_NM, 16) P5 0x0:ud
39-
; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0>
40-
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
41-
; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0>
42-
; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w
43-
; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0>
44-
; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d
45-
; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
46-
; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32
47-
; CHECK: or (M1_NM, 16) P4 P4 P6
48-
; CHECK: xor (M1_NM, 16) P5 P5 P6
49-
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
50-
; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
51-
; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud
52-
; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
53-
; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0>
54-
; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d
55-
; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
56-
; CHECK: and (M1_NM, 16) P7 P7 P5
57-
; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32
58-
; CHECK: or (M1_NM, 16) P4 P4 P7
59-
; CHECK: xor (M1_NM, 16) P5 P5 P7
60-
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
61-
; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0>
62-
; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud
63-
; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w
64-
; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0>
65-
; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d
66-
; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
67-
; CHECK: and (M1_NM, 16) P8 P8 P5
68-
; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32
69-
; CHECK: or (M1_NM, 16) P4 P4 P8
70-
; CHECK: xor (M1_NM, 16) P5 P5 P8
71-
; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5
72-
; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0>
73-
; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud
74-
; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w
75-
; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0>
76-
; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d
77-
; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
78-
; CHECK: and (M1_NM, 16) P9 P9 P5
79-
; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32
80-
; CHECK: or (M1_NM, 16) P4 P4 P9
81-
; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
8294
%out = extractelement <3 x i32> %call, i32 %val
8395
store i32 %out, i32 addrspace(1)* %dst, align 1
8496
ret void

0 commit comments

Comments
 (0)