@@ -2186,15 +2186,7 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
2186
2186
G4_CondMod* condmod = builder.duplicateOperand (inst->getCondMod ());
2187
2187
G4_Predicate* pred = builder.duplicateOperand (inst->getPredicate ());
2188
2188
2189
- // check if the following inst is mulh and uses the same srcs as this mul.
2190
- // if true, translate them into
2191
- // mul acc src0 src1
2192
- // mach dst_mulh src0 src1
2193
- // mov mul_dst src0 src1
2194
- INST_LIST_ITER next_i = i;
2195
- next_i++;
2196
2189
G4_Type tmp_type = (IS_UNSIGNED_INT (src0->getType ()) && IS_UNSIGNED_INT (src1->getType ())) ? Type_UD : Type_D;
2197
- bool isCompressed = isCompressedInst (inst);
2198
2190
2199
2191
if (src1->isSrcRegRegion ())
2200
2192
{
@@ -2210,115 +2202,17 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
2210
2202
bool sat_mod = inst->getSaturate ();
2211
2203
inst->setSaturate (false );
2212
2204
2213
- // see if we can combine this mul with a mulh following it
2214
- if (next_i != bb->end ())
2215
- {
2216
- G4_INST* next_inst = *next_i;
2217
-
2218
- if (next_inst->opcode () == G4_mulh &&
2219
- next_inst->getExecSize () == exec_size &&
2220
- inst->getPredicate () == next_inst->getPredicate () &&
2221
- ((srcExchanged &&
2222
- src0->getType () == next_inst->getSrc (1 )->getType () &&
2223
- src0->compareOperand (next_inst->getSrc (1 )) == Rel_eq &&
2224
- src1->getType () == next_inst->getSrc (0 )->getType () &&
2225
- src1->compareOperand (next_inst->getSrc (0 )) == Rel_eq) ||
2226
- (!srcExchanged &&
2227
- src0->getType () == next_inst->getSrc (0 )->getType () &&
2228
- src0->compareOperand (next_inst->getSrc (0 )) == Rel_eq &&
2229
- src1->getType () == next_inst->getSrc (1 )->getType () &&
2230
- src1->compareOperand (next_inst->getSrc (1 )) == Rel_eq)))
2231
- {
2232
- // change current mul inst
2233
- G4_DstRegRegion* acc_dst_opnd = builder.createDst (
2234
- builder.phyregpool .getAcc0Reg (),
2235
- 0 ,
2236
- 0 ,
2237
- 1 ,
2238
- tmp_type);
2239
-
2240
- inst->setDest (acc_dst_opnd);
2241
-
2242
- fixMulSrc1 (i, bb);
2243
-
2244
- inst->transferUse (next_inst, true );
2245
- inst->addDefUse (next_inst, Opnd_implAccSrc);
2246
- // change mulh inst
2247
- next_inst->setOpcode (G4_mach);
2248
-
2249
- G4_DstRegRegion* next_dst = next_inst->getDst ();
2250
- if (next_dst != NULL &&
2251
- (next_inst->getSaturate () ||
2252
- next_dst->getByteOffset () % GENX_GRF_REG_SIZ != 0 ||
2253
- (!bb->isAllLaneActive () && next_inst->isWriteEnableInst () == false ) ||
2254
- (next_dst &&
2255
- ((next_dst->getExecTypeSize () > G4_Type_Table[Type_D].byteSize ) ||
2256
- isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(next_dst)))))
2257
- {
2258
- // add a tmp mov
2259
- G4_DstRegRegion* new_next_dst = insertMovAfter (next_i, next_dst, next_dst->getType (), bb);
2260
- next_inst->setDest (new_next_dst);
2261
- }
2262
-
2263
- // set implicit source/dst for MACH
2264
- const RegionDesc* rd = exec_size == 1 ? builder.getRegionScalar () : builder.getRegionStride1 ();
2265
- G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion (Mod_src_undef, Direct, builder.phyregpool .getAcc0Reg (), 0 , 0 , rd, tmp_type);
2266
- next_inst->setImplAccSrc (acc_src_opnd);
2267
- next_inst->setImplAccDst (builder.createDstRegRegion (*acc_dst_opnd));
2268
-
2269
- // create mov inst
2270
- G4_SrcRegRegion* movAccSrc = builder.createSrcRegRegion (Mod_src_undef, Direct, builder.phyregpool .getAcc0Reg (), 0 , 0 , rd, tmp_type);
2271
- G4_INST* newMov = builder.createMov (exec_size, dst, movAccSrc, inst_opt, false );
2272
- newMov->setPredicate (pred);
2273
- newMov->setCondMod (condmod);
2274
-
2275
- INST_LIST_ITER iter = next_i;
2276
- iter++;
2277
- bb->insertBefore (iter, newMov);
2278
-
2279
- next_inst->addDefUse (newMov, Opnd_src0);
2280
-
2281
- INST_LIST_ITER last_iter = iter;
2282
- last_iter--;
2283
-
2284
- if (dst != NULL &&
2285
- (sat_mod ||
2286
- (dst &&
2287
- ((dst->getExecTypeSize () > G4_Type_Table[Type_D].byteSize ) ||
2288
- (isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))))))
2289
- {
2290
- // add a tmp mov
2291
- iter--;
2292
- G4_DstRegRegion* new_next_dst = insertMovAfter (iter, dst, dst->getType (), bb);
2293
- newMov->setDest (new_next_dst);
2294
- if (new_next_dst != dst && sat_mod)
2295
- {
2296
- MUST_BE_TRUE (iter != bb->end () && (*iter)->opcode () == G4_mov,
2297
- " Next instruciton should be the MOV generated for consistent Dst and ACC source region." );
2298
- (*iter)->setSaturate (false );
2299
- }
2300
- }
2301
-
2302
- next_inst->setOptionOn (InstOpt_AccWrCtrl);
2303
-
2304
- if (exec_size > builder.getNativeExecSize ())
2305
- {
2306
- splitDWMULInst (i, last_iter, bb);
2307
- }
2308
- return true ;
2309
- }
2310
- }
2311
-
2312
2205
G4_DstRegRegion* acc_dst_opnd = builder.createDst (builder.phyregpool .getAcc0Reg (), 0 , 0 , 1 , tmp_type);
2313
2206
inst->setDest (acc_dst_opnd);
2314
2207
fixMulSrc1 (i, bb);
2315
2208
2316
2209
inst->setNoMask (true );
2317
2210
2318
- if (pred != NULL ) {
2211
+ if (pred)
2212
+ {
2319
2213
// conditional modifier cannot be used
2320
2214
// when the MUL source operand is of dword type.
2321
- inst->setCondMod (NULL );
2215
+ inst->setCondMod (nullptr );
2322
2216
}
2323
2217
2324
2218
// Dst is either null, or a temp D if the original dst is Q/UQ
@@ -2344,24 +2238,15 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
2344
2238
iter++;
2345
2239
bb->insertBefore (iter, newInst);
2346
2240
2347
- inst->setPredicate (NULL );
2241
+ inst->setPredicate (nullptr );
2348
2242
2349
2243
inst->copyDef (newInst, Opnd_src0, Opnd_src0);
2350
2244
inst->copyDef (newInst, Opnd_src1, Opnd_src1);
2351
2245
inst->transferUse (newInst);
2352
2246
inst->addDefUse (newInst, Opnd_implAccSrc);
2353
2247
2354
2248
// create an implicit source for MACH
2355
- const RegionDesc* rd = NULL ;
2356
- unsigned short vs = 0 , wd = exec_size, hs = 0 ;
2357
- if (exec_size > 1 ) {
2358
- if (isCompressed) {
2359
- wd = wd / 2 ;
2360
- }
2361
- hs = 1 ;
2362
- vs = wd;
2363
- }
2364
- rd = builder.createRegionDesc (vs, wd, hs);
2249
+ const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1 () : builder.getRegionScalar ();
2365
2250
G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion (Mod_src_undef, Direct,
2366
2251
builder.phyregpool .getAcc0Reg (), 0 , 0 , rd, tmp_type);
2367
2252
0 commit comments