Skip to content

Commit 12da4a9

Browse files
weiyu-chensys_zuul
authored andcommitted
Remove mul+mulh pattern match as it is unclear how often it occurs.
Change-Id: Icaa93cb4457b751e3612e05d5c6836a34d10fcd1
1 parent 9164234 commit 12da4a9

File tree

1 file changed

+5
-120
lines changed

1 file changed

+5
-120
lines changed

visa/HWConformity.cpp

Lines changed: 5 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,15 +2186,7 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
21862186
G4_CondMod* condmod = builder.duplicateOperand(inst->getCondMod());
21872187
G4_Predicate* pred = builder.duplicateOperand(inst->getPredicate());
21882188

2189-
// check if the following inst is mulh and uses the same srcs as this mul.
2190-
// if true, translate them into
2191-
// mul acc src0 src1
2192-
// mach dst_mulh src0 src1
2193-
// mov mul_dst src0 src1
2194-
INST_LIST_ITER next_i = i;
2195-
next_i++;
21962189
G4_Type tmp_type = (IS_UNSIGNED_INT(src0->getType()) && IS_UNSIGNED_INT(src1->getType())) ? Type_UD : Type_D;
2197-
bool isCompressed = isCompressedInst(inst);
21982190

21992191
if (src1->isSrcRegRegion())
22002192
{
@@ -2210,115 +2202,17 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
22102202
bool sat_mod = inst->getSaturate();
22112203
inst->setSaturate(false);
22122204

2213-
// see if we can combine this mul with a mulh following it
2214-
if (next_i != bb->end())
2215-
{
2216-
G4_INST* next_inst = *next_i;
2217-
2218-
if (next_inst->opcode() == G4_mulh &&
2219-
next_inst->getExecSize() == exec_size &&
2220-
inst->getPredicate() == next_inst->getPredicate() &&
2221-
((srcExchanged &&
2222-
src0->getType() == next_inst->getSrc(1)->getType() &&
2223-
src0->compareOperand(next_inst->getSrc(1)) == Rel_eq &&
2224-
src1->getType() == next_inst->getSrc(0)->getType() &&
2225-
src1->compareOperand(next_inst->getSrc(0)) == Rel_eq) ||
2226-
(!srcExchanged &&
2227-
src0->getType() == next_inst->getSrc(0)->getType() &&
2228-
src0->compareOperand(next_inst->getSrc(0)) == Rel_eq &&
2229-
src1->getType() == next_inst->getSrc(1)->getType() &&
2230-
src1->compareOperand(next_inst->getSrc(1)) == Rel_eq)))
2231-
{
2232-
// change current mul inst
2233-
G4_DstRegRegion* acc_dst_opnd = builder.createDst(
2234-
builder.phyregpool.getAcc0Reg(),
2235-
0,
2236-
0,
2237-
1,
2238-
tmp_type);
2239-
2240-
inst->setDest(acc_dst_opnd);
2241-
2242-
fixMulSrc1(i, bb);
2243-
2244-
inst->transferUse(next_inst, true);
2245-
inst->addDefUse(next_inst, Opnd_implAccSrc);
2246-
// change mulh inst
2247-
next_inst->setOpcode(G4_mach);
2248-
2249-
G4_DstRegRegion* next_dst = next_inst->getDst();
2250-
if (next_dst != NULL &&
2251-
(next_inst->getSaturate() ||
2252-
next_dst->getByteOffset() % GENX_GRF_REG_SIZ != 0 ||
2253-
(!bb->isAllLaneActive() && next_inst->isWriteEnableInst() == false) ||
2254-
(next_dst &&
2255-
((next_dst->getExecTypeSize() > G4_Type_Table[Type_D].byteSize) ||
2256-
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(next_dst)))))
2257-
{
2258-
// add a tmp mov
2259-
G4_DstRegRegion* new_next_dst = insertMovAfter(next_i, next_dst, next_dst->getType(), bb);
2260-
next_inst->setDest(new_next_dst);
2261-
}
2262-
2263-
// set implicit source/dst for MACH
2264-
const RegionDesc* rd = exec_size == 1 ? builder.getRegionScalar() : builder.getRegionStride1();
2265-
G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion(Mod_src_undef, Direct, builder.phyregpool.getAcc0Reg(), 0, 0, rd, tmp_type);
2266-
next_inst->setImplAccSrc(acc_src_opnd);
2267-
next_inst->setImplAccDst(builder.createDstRegRegion(*acc_dst_opnd));
2268-
2269-
// create mov inst
2270-
G4_SrcRegRegion* movAccSrc = builder.createSrcRegRegion(Mod_src_undef, Direct, builder.phyregpool.getAcc0Reg(), 0, 0, rd, tmp_type);
2271-
G4_INST* newMov = builder.createMov(exec_size, dst, movAccSrc, inst_opt, false);
2272-
newMov->setPredicate(pred);
2273-
newMov->setCondMod(condmod);
2274-
2275-
INST_LIST_ITER iter = next_i;
2276-
iter++;
2277-
bb->insertBefore(iter, newMov);
2278-
2279-
next_inst->addDefUse(newMov, Opnd_src0);
2280-
2281-
INST_LIST_ITER last_iter = iter;
2282-
last_iter--;
2283-
2284-
if (dst != NULL &&
2285-
(sat_mod ||
2286-
(dst &&
2287-
((dst->getExecTypeSize() > G4_Type_Table[Type_D].byteSize) ||
2288-
(isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))))))
2289-
{
2290-
// add a tmp mov
2291-
iter--;
2292-
G4_DstRegRegion* new_next_dst = insertMovAfter(iter, dst, dst->getType(), bb);
2293-
newMov->setDest(new_next_dst);
2294-
if (new_next_dst != dst && sat_mod)
2295-
{
2296-
MUST_BE_TRUE(iter != bb->end() && (*iter)->opcode() == G4_mov,
2297-
"Next instruciton should be the MOV generated for consistent Dst and ACC source region.");
2298-
(*iter)->setSaturate(false);
2299-
}
2300-
}
2301-
2302-
next_inst->setOptionOn(InstOpt_AccWrCtrl);
2303-
2304-
if (exec_size > builder.getNativeExecSize())
2305-
{
2306-
splitDWMULInst(i, last_iter, bb);
2307-
}
2308-
return true;
2309-
}
2310-
}
2311-
23122205
G4_DstRegRegion* acc_dst_opnd = builder.createDst(builder.phyregpool.getAcc0Reg(), 0, 0, 1, tmp_type);
23132206
inst->setDest(acc_dst_opnd);
23142207
fixMulSrc1(i, bb);
23152208

23162209
inst->setNoMask(true);
23172210

2318-
if (pred != NULL) {
2211+
if (pred)
2212+
{
23192213
// conditional modifier cannot be used
23202214
// when the MUL source operand is of dword type.
2321-
inst->setCondMod(NULL);
2215+
inst->setCondMod(nullptr);
23222216
}
23232217

23242218
// Dst is either null, or a temp D if the original dst is Q/UQ
@@ -2344,24 +2238,15 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
23442238
iter++;
23452239
bb->insertBefore(iter, newInst);
23462240

2347-
inst->setPredicate(NULL);
2241+
inst->setPredicate(nullptr);
23482242

23492243
inst->copyDef(newInst, Opnd_src0, Opnd_src0);
23502244
inst->copyDef(newInst, Opnd_src1, Opnd_src1);
23512245
inst->transferUse(newInst);
23522246
inst->addDefUse(newInst, Opnd_implAccSrc);
23532247

23542248
// create an implicit source for MACH
2355-
const RegionDesc* rd = NULL;
2356-
unsigned short vs = 0, wd = exec_size, hs = 0;
2357-
if (exec_size > 1) {
2358-
if (isCompressed) {
2359-
wd = wd / 2;
2360-
}
2361-
hs = 1;
2362-
vs = wd;
2363-
}
2364-
rd = builder.createRegionDesc(vs, wd, hs);
2249+
const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1() : builder.getRegionScalar();
23652250
G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion(Mod_src_undef, Direct,
23662251
builder.phyregpool.getAcc0Reg(), 0, 0, rd, tmp_type);
23672252

0 commit comments

Comments
 (0)