@@ -236,6 +236,10 @@ SPDX-License-Identifier: MIT
236
236
using namespace llvm ;
237
237
using namespace genx ;
238
238
239
+ static cl::opt<bool > UseUpper16Lanes (" vc-use-upper16-lanes" , cl::init(true ),
240
+ cl::Hidden,
241
+ cl::desc(" Limit legalization width" ));
242
+
239
243
namespace {
240
244
241
245
// Information on a part of a predicate.
@@ -355,6 +359,9 @@ class GenXLegalization : public FunctionPass {
355
359
// Illegally sized predicate values that need splitting at the end of
356
360
// processing the function.
357
361
SetVector<Instruction *> IllegalPredicates;
362
+ // Whether the function's module has stack calls or not. Used for making
363
+ // legalization decisions.
364
+ bool HasStackCalls = false ;
358
365
359
366
public:
360
367
static char ID;
@@ -538,6 +545,12 @@ bool GenXLegalization::runOnFunction(Function &F) {
538
545
.getTM <GenXTargetMachine>()
539
546
.getGenXSubtarget ();
540
547
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
548
+ // FIXME: Non-optimal solution. FGs info or some stackcalls-related analysis
549
+ // will be useful here.
550
+ HasStackCalls =
551
+ llvm::any_of (F.getParent ()->functions (), [](const Function &MF) {
552
+ return genx::requiresStackCall (MF);
553
+ });
541
554
// Check args for illegal predicates.
542
555
for (Function::arg_iterator fi = F.arg_begin (), fe = F.arg_end (); fi != fe;
543
556
++fi) {
@@ -1329,6 +1342,15 @@ unsigned GenXLegalization::determineWidth(unsigned WholeWidth,
1329
1342
// Prepare to keep track of whether an instruction with a minimum width
1330
1343
// (e.g. dp4) would be split too small, and whether we need to unbale.
1331
1344
unsigned ExecSizeAllowedBits = adjustTwiceWidthOrFixed4 (B);
1345
+ if (!UseUpper16Lanes || (HasStackCalls && ST->hasFusedEU ()))
1346
+ // Actually, we should legalize with these more strict requirements only FGs
1347
+ // of indirectly called functions. But there are two design issues that make
1348
+ // us legalize everything if the module has a stack call:
1349
+ // * jmpi to goto transformation is appied in VISA and it transforms more
1350
+ // than necessary
1351
+ // * this legalization pass does not have access to FGs
1352
+ ExecSizeAllowedBits &= 0x1f ;
1353
+
1332
1354
unsigned MainInstMinWidth =
1333
1355
1 << countTrailingZeros (ExecSizeAllowedBits, ZB_Undefined);
1334
1356
// Determine the vector width that we need to split into.
0 commit comments