Skip to content

Commit e0217ee

Browse files
committed
[DAG] canCreateUndefOrPoison - only compute extract/index vector elt index knownbits when not poison
We were calling computeKnownBits to determine the bounds of the element index without ensuring that it wasn't poison, meaning if we did freeze the index, isGuaranteedNotToBeUndefOrPoison would then fail as we can't call computeKnownBits through FREEZE for potentially poison values. Fixes #92569
1 parent 7fc524f commit e0217ee

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5246,8 +5246,12 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52465246
// Ensure that the element index is in bounds.
52475247
EVT VecVT = Op.getOperand(0).getValueType();
52485248
SDValue Idx = Op.getOperand(Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1);
5249-
KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1);
5250-
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
5249+
if (isGuaranteedNotToBeUndefOrPoison(Idx, DemandedElts, PoisonOnly,
5250+
Depth + 1)) {
5251+
KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1);
5252+
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
5253+
}
5254+
return true;
52515255
}
52525256

52535257
case ISD::VECTOR_SHUFFLE: {

llvm/test/CodeGen/X86/pr92569.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
3+
4+
define void @PR92569(i64 %arg, <8 x i8> %arg1) {
5+
; CHECK-LABEL: PR92569:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: testq %rdi, %rdi
8+
; CHECK-NEXT: je .LBB0_1
9+
; CHECK-NEXT: # %bb.2: # %cond.false
10+
; CHECK-NEXT: rep bsfq %rdi, %rax
11+
; CHECK-NEXT: jmp .LBB0_3
12+
; CHECK-NEXT: .LBB0_1:
13+
; CHECK-NEXT: movl $64, %eax
14+
; CHECK-NEXT: .LBB0_3: # %cond.end
15+
; CHECK-NEXT: shrb $3, %al
16+
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
17+
; CHECK-NEXT: movzbl %al, %eax
18+
; CHECK-NEXT: movzbl -24(%rsp,%rax), %eax
19+
; CHECK-NEXT: movl %eax, 0
20+
; CHECK-NEXT: retq
21+
%cttz = call i64 @llvm.cttz.i64(i64 %arg, i1 false)
22+
%trunc = trunc i64 %cttz to i8
23+
%lshr = lshr i8 %trunc, 3
24+
%extractelement = extractelement <8 x i8> %arg1, i8 %lshr
25+
%freeze = freeze i8 %extractelement
26+
%zext = zext i8 %freeze to i32
27+
store i32 %zext, ptr addrspace(1) null, align 4
28+
ret void
29+
}

0 commit comments

Comments
 (0)