Skip to content

Commit fd82b5b

Browse files
committed
[LV] Support recieps without underlying instr in collectPoisonGenRec.
Support recipes without underlying instruction in collectPoisonGeneratingRecipes by directly trying to dyn_cast_or_null the underlying value. Fixes #70590.
1 parent ceb4dc4 commit fd82b5b

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,8 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
11031103
if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
11041104
RecWithFlags->dropPoisonGeneratingFlags();
11051105
} else {
1106-
Instruction *Instr = CurRec->getUnderlyingInstr();
1106+
Instruction *Instr = dyn_cast_or_null<Instruction>(
1107+
CurRec->getVPSingleValue()->getUnderlyingValue());
11071108
(void)Instr;
11081109
assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
11091110
"found instruction with poison generating flags not covered by "

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,89 @@ loop.exit:
405405
ret void
406406
}
407407

408+
@c = external global [5 x i8]
409+
410+
; Test case for https://github.com/llvm/llvm-project/issues/70590.
411+
; Note that the then block has UB, but I could not find any other way to
412+
; construct a suitable test case.
413+
define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
414+
; CHECK-LABEL: @pr70590_recipe_without_underlying_instr(
415+
; CHECK: vector.body:
416+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE6:%.*]] ]
417+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ]
418+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
419+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]],
420+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
421+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
422+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
423+
; CHECK: pred.srem.if:
424+
; CHECK-NEXT: [[TMP4:%.*]] = srem i64 3, 0
425+
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]]
426+
; CHECK: pred.srem.continue:
427+
; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
428+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
429+
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
430+
; CHECK: pred.srem.if1:
431+
; CHECK-NEXT: [[TMP7:%.*]] = srem i64 3, 0
432+
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]]
433+
; CHECK: pred.srem.continue2:
434+
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ poison, [[PRED_SREM_CONTINUE]] ], [ [[TMP7]], [[PRED_SREM_IF1]] ]
435+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
436+
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
437+
; CHECK: pred.srem.if3:
438+
; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0
439+
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]]
440+
; CHECK: pred.srem.continue4:
441+
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_SREM_CONTINUE2]] ], [ [[TMP10]], [[PRED_SREM_IF3]] ]
442+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
443+
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
444+
; CHECK: pred.srem.if5:
445+
; CHECK-NEXT: [[TMP13:%.*]] = srem i64 3, 0
446+
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE6]]
447+
; CHECK: pred.srem.continue6:
448+
; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ poison, [[PRED_SREM_CONTINUE4]] ], [ [[TMP13]], [[PRED_SREM_IF5]] ]
449+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP5]], -3
450+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], [[TMP15]]
451+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
452+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
453+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
454+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer
455+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]]
456+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0
457+
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4
458+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
459+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
460+
; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
461+
; CHECK: middle.block:
462+
463+
entry:
464+
br label %loop.header
465+
466+
loop.header:
467+
%iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
468+
%cmp = icmp eq i64 %iv, %n
469+
br i1 %cmp, label %loop.latch, label %then
470+
471+
then:
472+
%rem = srem i64 3, 0
473+
%add3 = add i64 %rem, -3
474+
%add5 = add i64 %iv, %add3
475+
%gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5
476+
%l = load i8, ptr %gep, align 1
477+
br label %loop.latch
478+
479+
loop.latch:
480+
%sr = phi i8 [ 0, %loop.header ], [ %l , %then ]
481+
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
482+
store i8 %sr, ptr %gep.dst, align 4
483+
%inc = add i64 %iv, 1
484+
%exitcond.not = icmp eq i64 %inc, 4
485+
br i1 %exitcond.not, label %exit, label %loop.header
486+
487+
exit:
488+
ret void
489+
}
490+
408491
attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
409492

410493
!0 = !{}

0 commit comments

Comments
 (0)