@@ -346,25 +346,27 @@ static bool happensBefore(Operation *a, Operation *b,
346
346
return false ;
347
347
}
348
348
349
- // / Return `true` if op dominance can be used to rule out read-after-write
350
- // / conflicts wrt. the given reads and writes .
349
+ // / Return `true` if op dominance can be used to rule out a read-after-write
350
+ // / conflicts based on the ordering of ops .
351
351
// /
352
- // / Op dominance can often be used to rule out potential conflicts such as
353
- // / "read" happens before " write". E.g., the following IR is not a RaW conflict
354
- // / because the the read happens *before* the write.
352
+ // / Generalized op dominance can often be used to rule out potential conflicts
353
+ // / due to "read happens before write". E.g., the following IR is not a RaW
354
+ // / conflict because the read happens *before* the write.
355
355
// /
356
- // / %0 = ... : tensor<?xf32>
357
- // / "reading_op"(%0) : tensor<?xf32>
358
- // / %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32>
356
+ // / Example 1:
357
+ // / %0 = ... : tensor<?xf32> // DEF
358
+ // / "reading_op"(%0) : tensor<?xf32> // READ
359
+ // / %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32> // WRITE
359
360
// /
360
361
// / This is no longer true inside loops (or repetitive regions). In such cases,
361
362
// / there may not be a meaningful `happensBefore` relationship because ops
362
363
// / could be executed multiple times. E.g.:
363
364
// /
364
- // / %0 = ... : tensor<?xf32>
365
+ // / Example 2:
366
+ // / %0 = ... : tensor<?xf32> // DEF
365
367
// / scf.for ... {
366
- // / "reading_op"(%0) : tensor<?xf32>
367
- // / %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32>
368
+ // / "reading_op"(%0) : tensor<?xf32> // READ
369
+ // / %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32> // WRITE
368
370
// / ...
369
371
// / }
370
372
// /
@@ -374,92 +376,78 @@ static bool happensBefore(Operation *a, Operation *b,
374
376
// / execution of writing_op. This is problematic because the tensor %0 they
375
377
// / operate on (i.e., the "definition") is defined outside of the loop.
376
378
// /
377
- // / Counter example:
379
+ // / On a high-level, there is a potential RaW in a program if there exists a
380
+ // / possible program execution such that there is a sequence of DEF, followed
381
+ // / by WRITE, followed by READ. Each additional DEF resets the sequence.
378
382
// /
383
+ // / E.g.:
384
+ // / No conflict: DEF, WRITE, DEF, READ
385
+ // / Potential conflict: DEF, READ, WRITE, READ, WRITE
386
+ // /
387
+ // / Example 1 has no conflict: DEF, READ, WRITE
388
+ // / Example 2 has a potential conflict: DEF, (READ, WRITE)*
389
+ //
390
+ // / Example 3:
379
391
// / scf.for ... {
380
392
// / %0 = ... : tensor<?xf32>
381
393
// / "reading_op"(%0) : tensor<?xf32>
382
394
// / %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32>
383
395
// / ...
384
396
// / }
397
+ // / This has no conflict: (DEF, READ, WRITE)*
385
398
// /
386
- // / In this example, the definition %0 is in the same repetitive region as
387
- // / "writing_op", so op dominance can be used to compute the `happensBefore`
388
- // / relationship.
389
- // /
390
- // / Whether op dominance can be used or not is decided as follows: Find the
391
- // / closest enclosing repetitive region of all buffer writes wrt. the given
392
- // / tensor reads and writes. (The given sets of reads and writes contain the
393
- // / entire alias set.) In case of a read, we look at the op that defines the
394
- // / read value. In case of a write, we look at the op that is writing. If all of
395
- // / those ops are in the same closest enclosing repetitive region (nullptr in
396
- // / case of "no repetitive region" found at all), then op dominance can be used.
397
- // / Otherwise, it cannot be used.
398
- // /
399
- // / Example: The common enclosing repetitive region is the scf.for loop.
400
- // / Op dominance can be used.
399
+ // / Example 4:
400
+ // / %0 = ... : tensor<?xf32>
401
401
// / scf.for ... {
402
- // / %0 = tensor.generate
403
- // / "read "(%0)
402
+ // / scf.for ... { "reading_op"(%0) }
403
+ // / %1 = "writing_op "(%0)
404
404
// / }
405
+ // / This has a potential conflict: DEF, ((READ)*, WRITE)*
405
406
// /
406
- // / Example: The common enclosing repetitive region is nullptr: There is no
407
- // / repetitive region around the tensor.generate. Op dominance can be
408
- // / used.
409
- // / %0 = tensor.generate
410
- // / scf.for ... { "read"(%0) }
407
+ // / Example 5:
408
+ // / %0 = ... : tensor<?xf32>
409
+ // / scf.for ... { %1 = "writing_op"(%0) }
410
+ // / scf.for ... { "reading_op"(%0) }
411
+ // / This has a potential conflict: DEF, WRITE*, READ*
411
412
// /
412
- // / Example: The common enclosing repetitive regions of tensor.generate and
413
- // / "write" differ. Op dominance cannot be used.
414
- // / %0 = tensor.generate
415
- // / scf.for ... {
416
- // / "read"(%0)
417
- // / "write"(%0)
418
- // / }
413
+ // / The following rules are used to rule out RaW conflicts via ordering of ops:
419
414
// /
420
- // / Example: The common enclosing repetitive regions of tensor.generate and
421
- // / "write" differ, but there is no read of %0, so op dominance can be
422
- // / used .
423
- // / %0 = tensor.generate
424
- // / scf.for ... {
425
- // / "write"(%0)
426
- // / }
415
+ // / 1. If the closest enclosing repetitive region of DEF is a proper ancestor of
416
+ // / a repetitive region that enclosing both READ and WRITE, we cannot rule
417
+ // / out RaW conflict due to the ordering of ops .
418
+ // / 2. Otherwise: There are no loops that interfere with our analysis; for
419
+ // / analysis purposes, we can assume that there are no loops/repetitive
420
+ // / regions. I.e., we can rule out a RaW conflict if READ happensBefore WRITE
421
+ // / or WRITE happensBefore DEF. (Checked in `hasReadAfterWriteInterference`.)
427
422
// /
428
- // / Note: iter_args of loops are not aliases of their respective block
429
- // / arguments, so op domanice can be used when analyzing ops that operate
430
- // / on them.
431
- bool canUseOpDominance (const DenseSet<OpOperand *> &usesRead,
432
- const DenseSet<OpOperand *> &usesWrite,
423
+ bool canUseOpDominance (OpOperand *uRead, OpOperand *uWrite,
424
+ const SetVector<Value> &definitions,
433
425
const AnalysisState &state) {
434
426
const BufferizationOptions &options = state.getOptions ();
435
- std::optional<Region *> commonEnclosingRegion;
427
+ for (Value def : definitions) {
428
+ Region *rRead = getEnclosingRepetitiveRegion (uRead->getOwner (), options);
429
+ Region *rDef = getEnclosingRepetitiveRegion (def, options);
436
430
437
- // In case of a write, take the region in which the write takes place.
438
- for (OpOperand *uWrite : usesWrite) {
439
- Region *r = getEnclosingRepetitiveRegion (uWrite->getOwner (), options);
440
- if (!commonEnclosingRegion.has_value ()) {
441
- commonEnclosingRegion = r;
431
+ // READ and DEF are in the same repetitive region. `happensBefore` can be
432
+ // used to rule out RaW conflicts due to op ordering.
433
+ if (rRead == rDef)
442
434
continue ;
443
- }
444
- if (*commonEnclosingRegion != r)
445
- return false ;
446
- }
447
435
448
- // In case of a read, take the region which the read value is defined.
449
- for (OpOperand *uRead : usesRead) {
450
- // Optimization: Skip reads of values that have no defined contents.
451
- if (!state.bufferizesToMemoryWrite (uRead->get ()))
452
- continue ;
453
- Region *r = getEnclosingRepetitiveRegion (uRead->get (), options);
454
- if (!commonEnclosingRegion.has_value ()) {
455
- commonEnclosingRegion = r;
456
- continue ;
436
+ // Find the enclosing repetitive region of READ that is closest to DEF but
437
+ // not the repetitive region of DEF itself.
438
+ while (true ) {
439
+ Region *nextRegion = getNextEnclosingRepetitiveRegion (rRead, options);
440
+ if (nextRegion == rDef)
441
+ break ;
442
+ assert (nextRegion && " expected to find another repetitive region" );
443
+ rRead = nextRegion;
457
444
}
458
- if (*commonEnclosingRegion != r)
445
+
446
+ // We cannot use op dominance if WRITE is inside the same repetitive region.
447
+ if (rRead->getParentOp ()->isAncestor (uWrite->getOwner ()))
459
448
return false ;
460
449
}
461
-
462
- return commonEnclosingRegion.has_value ();
450
+ return true ;
463
451
}
464
452
465
453
// / Annotate IR with details about the detected RaW conflict.
@@ -507,10 +495,6 @@ static bool hasReadAfterWriteInterference(
507
495
AnalysisState &state, const BufferizationAliasInfo &aliasInfo) {
508
496
const BufferizationOptions &options = state.getOptions ();
509
497
510
- // Check if op dominance can be used to rule out read-after-write conflicts.
511
- bool useDominance = canUseOpDominance (usesRead, usesWrite, state);
512
- LLVM_DEBUG (llvm::dbgs () << " \n - useDominance = " << useDominance << " \n " );
513
-
514
498
for (OpOperand *uRead : usesRead) {
515
499
Operation *readingOp = uRead->getOwner ();
516
500
LLVM_DEBUG (llvm::dbgs () << " \n - check conflict:\n " );
@@ -542,6 +526,12 @@ static bool hasReadAfterWriteInterference(
542
526
<< uConflictingWrite->getOperandNumber () << " of "
543
527
<< *uConflictingWrite->getOwner () << " \n " );
544
528
529
+ // Check if op dominance can be used to rule out read-after-write
530
+ // conflicts.
531
+ bool useDominance =
532
+ canUseOpDominance (uRead, uConflictingWrite, definitions, state);
533
+ LLVM_DEBUG (llvm::dbgs () << " \n - useDominance = " << useDominance << " \n " );
534
+
545
535
// Throughout this loop, check for multiple requirements that have to be
546
536
// met for uConflictingWrite to be an actual conflict.
547
537
Operation *conflictingWritingOp = uConflictingWrite->getOwner ();
0 commit comments