Skip to content

Commit 051633b

Browse files
AnBodrovapszymich
authored andcommitted
Revert: Move sub_group_id scalarization from CodeAssumption to WIAnalysis.
--------------------------- (cherry picked from commit b059af8)
1 parent b824d05 commit 051633b

File tree

7 files changed

+132
-170
lines changed

7 files changed

+132
-170
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ static void CommonOCLBasedPasses(
374374
mpm.add(createBIFTransformsPass());
375375
}
376376

377+
if (IGC_IS_FLAG_ENABLED(EnableCodeAssumption))
378+
{
379+
mpm.add(new CodeAssumption());
380+
}
381+
377382
if (pContext->m_instrTypes.hasFRem)
378383
{
379384
mpm.add(new HandleFRemInstructions());
@@ -400,11 +405,6 @@ static void CommonOCLBasedPasses(
400405
mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_END));
401406
mpm.add(new BIFFlagCtrlResolution(pContext));
402407

403-
if (IGC_IS_FLAG_ENABLED(EnableCodeAssumption))
404-
{
405-
mpm.add(new CodeAssumption());
406-
}
407-
408408
if (IGC_GET_FLAG_VALUE(AllowMem2Reg))
409409
{
410410
mpm.add(createPromoteMemoryToRegisterPass());

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 0 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,11 @@ SPDX-License-Identifier: MIT
2626
#include <stack>
2727
#include <sstream>
2828
#include "Probe/Assertion.h"
29-
#include <llvmWrapper/IR/PatternMatch.h>
3029

3130
using namespace llvm;
3231
using namespace IGC;
3332
using namespace IGC::IGCMD;
3433
using namespace IGC::Debug;
35-
using namespace llvm::PatternMatch;
3634

3735
static cl::opt<bool> PrintWiaCheck(
3836
"print-wia-check", cl::init(false), cl::Hidden,
@@ -1257,134 +1255,6 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const LoadInst* inst)
12571255
return calculate_dep_simple(inst);
12581256
}
12591257

1260-
// This functions checks if result of inst is sub_group_id value.
1261-
// In other words, it checks the following pattern:
1262-
//
1263-
// %localSizeX = extractelement <3 x i32> %localSize, i32 0
1264-
// %localSizeY = extractelement <3 x i32> %localSize, i32 1
1265-
// %localIdZZext = zext i16 %localIdZ to i32
1266-
// %mul1 = mul i32 %localSizeY, %localIdZzext
1267-
// %localIdYZext = zext i16 %localIdY to i32
1268-
// %add1 = add i32 %mul1, %localIdYZext
1269-
// %mul2 = mul i32 %add1, %localSizeX
1270-
// %add2 = add i32 %mul2, %localIdX4
1271-
// %simdSize = call i32 @llvm.genx.GenISA.simdSize()
1272-
//
1273-
// pattern 1:
1274-
// %lshr1 = lshr i32 %simdSize, 4
1275-
// %lshr2 = lshr i32 %add2, 3
1276-
// %sgid = lshr i32 %lshr2, %lshr1
1277-
//
1278-
// or
1279-
//
1280-
// pattern 2:
1281-
// %sgid = udiv i32 %add2, %simdSize
1282-
//
1283-
// This pattern matching can be removed, when get_subgroup_id call is inlined after applying the WIAnalysis.
1284-
bool WIAnalysisRunner::check_sg_id(const Instruction* inst)
1285-
{
1286-
const Value *val = cast<Value>(inst);
1287-
Value *lshrInst1 = nullptr;
1288-
Value *lshrInst2 = nullptr;
1289-
Value *addInst1 = nullptr;
1290-
Value *addInst2 = nullptr;
1291-
Value *callVal1 = nullptr;
1292-
Value *extrVal1 = nullptr;
1293-
Value *extrVal2 = nullptr;
1294-
Value *mulInst1 = nullptr;
1295-
Value *mulInst2 = nullptr;
1296-
Value *localIdX = nullptr;
1297-
Value *localIdY = nullptr;
1298-
Value *localIdZ = nullptr;
1299-
1300-
FunctionInfoMetaDataHandle funcInfoMD = m_pMdUtils->getFunctionsInfoItem(m_func);
1301-
SubGroupSizeMetaDataHandle subGroupSize = funcInfoMD->getSubGroupSize();
1302-
// If subGroupSize is not set in the metadata, then we need to check the pattern with llvm.genx.GenISA.simdSize call.
1303-
if (subGroupSize->hasValue())
1304-
{
1305-
uint32_t simdSize = (uint32_t)subGroupSize->getSIMDSize();
1306-
IGC_ASSERT(simdSize == 8 || simdSize == 16 || simdSize == 32);
1307-
uint32_t power = 0;
1308-
if (simdSize == 32)
1309-
{
1310-
power = 5;
1311-
}
1312-
else if (simdSize == 16)
1313-
{
1314-
power = 4;
1315-
}
1316-
else
1317-
{
1318-
power = 3;
1319-
}
1320-
1321-
auto lshrPat1 = m_LShr(m_Value(addInst1), m_SpecificInt(power));
1322-
if (!match(val, lshrPat1))
1323-
return false;
1324-
}
1325-
else
1326-
{
1327-
auto lshrPat1 = m_LShr(m_Value(lshrInst1), m_Value(lshrInst2));
1328-
1329-
// Check pattern 1.
1330-
if (match(val, lshrPat1))
1331-
{
1332-
auto lshrPat2 = m_LShr(m_Value(addInst1), m_SpecificInt(3));
1333-
auto lshrPat3 = m_LShr(m_Value(callVal1), m_SpecificInt(4));
1334-
1335-
match(lshrInst1, lshrPat2);
1336-
match(lshrInst2, lshrPat3);
1337-
}
1338-
1339-
// If pattern 1 was not found, check pattern 2.
1340-
if (!callVal1 || !addInst1)
1341-
{
1342-
auto udivPat1 = m_UDiv(m_Value(addInst1), m_Value(callVal1));
1343-
if (!match(val, udivPat1))
1344-
return false;
1345-
}
1346-
1347-
GenIntrinsicInst *genInst = dyn_cast<GenIntrinsicInst>(callVal1);
1348-
if (!genInst || (genInst->getIntrinsicID() != GenISAIntrinsic::GenISA_simdSize))
1349-
return false;
1350-
}
1351-
1352-
auto addPat11 = m_Add(m_Value(mulInst1), m_ZExt(m_Value(localIdX)));
1353-
auto addPat12 = m_Add(m_ZExt(m_Value(localIdX)), m_Value(mulInst1));
1354-
if (!match(addInst1, addPat11) && !match(addInst1, addPat12))
1355-
return false;
1356-
1357-
auto mulPat11 = m_Mul(m_ExtractElt(m_Value(extrVal1), m_SpecificInt(0)), m_Value(addInst2));
1358-
auto mulPat12 = m_Mul(m_Value(addInst2), m_ExtractElt(m_Value(extrVal1), m_SpecificInt(0)));
1359-
if (!match(mulInst1, mulPat11) && !match(mulInst1, mulPat12))
1360-
return false;
1361-
1362-
auto addPat21 = m_Add(m_Value(mulInst2), m_ZExt(m_Value(localIdY)));
1363-
auto addPat22 = m_Add(m_ZExt(m_Value(localIdY)), m_Value(mulInst2));
1364-
if (!match(addInst2, addPat21) && !match(addInst2, addPat22))
1365-
return false;
1366-
1367-
auto mulPat21 = m_Mul(m_ExtractElt(m_Value(extrVal2), m_SpecificInt(1)), m_ZExt(m_Value(localIdZ)));
1368-
auto mulPat22 = m_Mul(m_ZExt(m_Value(localIdZ)), m_ExtractElt(m_Value(extrVal2), m_SpecificInt(1)));
1369-
if (!match(mulInst2, mulPat21) && !match(mulInst2, mulPat22))
1370-
return false;
1371-
1372-
ImplicitArgs implicitArgs(*m_func, m_pMdUtils);
1373-
Value *argX = implicitArgs.getImplicitArgValue(*m_func, ImplicitArg::LOCAL_ID_X, m_pMdUtils);
1374-
Value *argY = implicitArgs.getImplicitArgValue(*m_func, ImplicitArg::LOCAL_ID_Y, m_pMdUtils);
1375-
Value *argZ = implicitArgs.getImplicitArgValue(*m_func, ImplicitArg::LOCAL_ID_Z, m_pMdUtils);
1376-
Value *localSize = implicitArgs.getImplicitArgValue(*m_func, ImplicitArg::LOCAL_SIZE, m_pMdUtils);
1377-
Value *enqLocalSize = implicitArgs.getImplicitArgValue(*m_func, ImplicitArg::ENQUEUED_LOCAL_WORK_SIZE, m_pMdUtils);
1378-
1379-
if ((localIdX != argX) || (localIdY != argY) || (localIdZ != argZ))
1380-
return false;
1381-
1382-
if ((localSize != extrVal1) && (enqLocalSize != extrVal1) || (localSize != extrVal2) && (enqLocalSize != extrVal2))
1383-
return false;
1384-
1385-
return true;
1386-
}
1387-
13881258
WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(
13891259
const BinaryOperator* inst)
13901260
{
@@ -1397,11 +1267,6 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(
13971267
WIAnalysis::WIDependancy dep1 = getDependency(op1);
13981268
IGC_ASSERT(dep1 < WIAnalysis::NumDeps);
13991269

1400-
if (check_sg_id(inst))
1401-
{
1402-
return WIAnalysis::UNIFORM_THREAD;
1403-
}
1404-
14051270
// For whatever binary operation,
14061271
// uniform returns uniform
14071272
WIAnalysis::WIDependancy dep = select_conversion[dep0][dep1];

IGC/Compiler/CISACodeGen/WIAnalysis.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,6 @@ namespace IGC
160160
bool assume_uniform = false;
161161
};
162162

163-
bool check_sg_id(const llvm::Instruction* inst);
164-
165163
/// @brief Update dependency relations between all values
166164
void updateDeps();
167165

IGC/Compiler/Optimizer/CodeAssumption.cpp

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ char CodeAssumption::ID = 0;
4444

4545
bool CodeAssumption::runOnModule(Module& M)
4646
{
47-
m_pMDUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();;
47+
m_pMDUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
48+
// Add code assist uniform analysis.
49+
uniformHelper(&M);
4850

4951
if (IGC_GET_FLAG_VALUE(EnableCodeAssumption) > 1)
5052
{
@@ -54,6 +56,57 @@ bool CodeAssumption::runOnModule(Module& M)
5456
return m_changed;
5557
}
5658

59+
void CodeAssumption::uniformHelper(Module* M)
60+
{
61+
ModuleMetaData* modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
62+
63+
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
64+
{
65+
Function* F = &(*I);
66+
67+
StringRef FN = F->getName();
68+
69+
// sub_group_id
70+
if (!FN.equals("_Z25__spirv_BuiltInSubgroupIdv") &&
71+
!FN.equals("__builtin_spirv_BuiltInSubgroupId") &&
72+
!FN.equals("_Z16get_sub_group_idv"))
73+
continue;
74+
// find all the callees
75+
for (auto ui = F->use_begin(), ue = F->use_end(); ui != ue; ++ui) {
76+
auto CI = dyn_cast<CallInst>(ui->getUser());
77+
if (!CI) continue;
78+
auto BB = CI->getParent();
79+
auto KF = BB->getParent();
80+
81+
if (!IsSGIdUniform(m_pMDUtils, modMD, KF))
82+
continue;
83+
84+
// The value must be uniform. Using shuffle with index=0 to
85+
// enforce it. assuming lane-0 is active
86+
Type* int32Ty = Type::getInt32Ty(M->getContext());
87+
Value* args[3];
88+
args[0] = CI;
89+
args[1] = ConstantInt::getNullValue(int32Ty);
90+
args[2] = ConstantInt::get(int32Ty, 0);
91+
92+
Type* ITys[3] = { args[0]->getType(), int32Ty, int32Ty};
93+
Function* shuffleIntrin = GenISAIntrinsic::getDeclaration(
94+
M,
95+
GenISAIntrinsic::GenISA_WaveShuffleIndex,
96+
ITys);
97+
98+
Instruction* shuffleCall = CallInst::Create(shuffleIntrin, args, "sgid", CI->getNextNode());
99+
100+
shuffleCall->setDebugLoc(CI->getDebugLoc());
101+
102+
CI->replaceAllUsesWith(shuffleCall);
103+
shuffleCall->setOperand(0, CI);
104+
105+
m_changed = true;
106+
}
107+
}
108+
}
109+
57110
void CodeAssumption::addAssumption(Module* M)
58111
{
59112
// Do it for 64-bit pointer only

IGC/Compiler/Optimizer/CodeAssumption.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ namespace IGC
6363

6464
IGCMD::MetaDataUtils* m_pMDUtils = nullptr;
6565

66+
// Simple change to help uniform analysis (later).
67+
void uniformHelper(llvm::Module* M);
68+
6669
// Add llvm.assume to assist other optimization such statelessToStateful
6770
void addAssumption(llvm::Module* M);
6871

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2022 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt -enable-debugify -igc-codeassumption -S < %s 2>&1 | FileCheck %s
10+
; ------------------------------------------------
11+
; CodeAssumption : Uniform part
12+
; ------------------------------------------------
13+
; Was reduced from ocl test kernel:
14+
;
15+
; __kernel void bar(int a)
16+
; {
17+
; int subid = get_sub_group_id();
18+
; int sum = subid + a;
19+
; }
20+
21+
; Debug-info related check
22+
; CHECK-NOT: WARNING
23+
; CHECK: CheckModuleDebugify: PASS
24+
25+
; Function Attrs: noinline nounwind
26+
define spir_kernel void @bar(i32 %a) #0 {
27+
; CHECK-LABEL: @bar(
28+
; CHECK: entry:
29+
; CHECK: %subid = alloca i32, align 4
30+
; CHECK: [[CALL:%.*]] = call spir_func i32 @__builtin_spirv_BuiltInSubgroupId()
31+
; CHECK: [[SGID:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32.i32.i32(i32 [[CALL]], i32 0, i32 0)
32+
; CHECK: store i32 [[SGID]], i32* %subid, align 4
33+
;
34+
entry:
35+
%a.addr = alloca i32, align 4
36+
%subid = alloca i32, align 4
37+
%sum = alloca i32, align 4
38+
store i32 %a, i32* %a.addr, align 4
39+
%call = call spir_func i32 @__builtin_spirv_BuiltInSubgroupId() #2
40+
store i32 %call, i32* %subid, align 4
41+
%0 = load i32, i32* %subid, align 4
42+
%1 = load i32, i32* %a.addr, align 4
43+
%add = add nsw i32 %0, %1
44+
store i32 %add, i32* %sum, align 4
45+
ret void
46+
}
47+
48+
; Function Attrs: nounwind readnone
49+
declare spir_func i32 @__builtin_spirv_BuiltInSubgroupId() #2
50+
51+
attributes #0 = { noinline nounwind }
52+
attributes #2 = { nounwind readnone }
53+
54+
!IGCMetadata = !{!2}
55+
!igc.functions = !{!13}
56+
57+
!2 = !{!"ModuleMD", !3}
58+
!3 = !{!"FuncMD", !4, !5}
59+
!4 = !{!"FuncMDMap[0]", void (i32)* @bar}
60+
!5 = !{!"FuncMDValue[0]", !6, !7, !11, !12}
61+
!6 = !{!"localOffsets"}
62+
!7 = !{!"workGroupWalkOrder", !8, !9, !10}
63+
!8 = !{!"dim0", i32 0}
64+
!9 = !{!"dim1", i32 1}
65+
!10 = !{!"dim2", i32 2}
66+
!11 = !{!"funcArgs"}
67+
!12 = !{!"functionType", !"KernelFunction"}
68+
!13 = !{void (i32)* @bar, !14}
69+
!14 = !{!15}
70+
!15 = !{!"function_type", i32 0}

IGC/ocloc_tests/features/sgid_simd1_codegen/check_simd1.cl

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)