Skip to content

[DAG] Remove extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x), idx) fold #87480

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22265,12 +22265,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);

// extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
VecOp.getOperand(0), Index));
}

// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5149,6 +5149,17 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::OR:
return ConsiderFlags && Op->getFlags().hasDisjoint();

case ISD::SCALAR_TO_VECTOR:
// Check if we demand any upper (undef) elements.
return !PoisonOnly && DemandedElts.ugt(1);

case ISD::EXTRACT_VECTOR_ELT: {
// Ensure that the element index is in bounds.
EVT VecVT = Op.getOperand(0).getValueType();
KnownBits KnownIdx = computeKnownBits(Op.getOperand(1), Depth + 1);
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
}

case ISD::INSERT_VECTOR_ELT:{
// Ensure that the element index is in bounds.
EVT VecVT = Op.getOperand(0).getValueType();
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

break;
}
case ISD::FREEZE: {
SDValue N0 = Op.getOperand(0);
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
/*PoisonOnly=*/false))
return N0;
break;
}
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Expand Down Expand Up @@ -3184,6 +3191,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
case ISD::FREEZE: {
SDValue N0 = Op.getOperand(0);
if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
/*PoisonOnly=*/false))
return TLO.CombineTo(Op, N0);

// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
// freeze(op(x, ...)) -> op(freeze(x), ...).
if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
TLO.DAG.getFreeze(N0.getOperand(0))));
break;
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
if (!DemandedElts.isAllOnes()) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42725,6 +42725,8 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
switch (Op.getOpcode()) {
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/freeze-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,14 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovdqa (%edx), %xmm0
; X86-NEXT: vpand (%ecx), %xmm0, %xmm0
; X86-NEXT: vpextrb $6, %xmm0, %ecx
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: vpextrb $6, %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_extractelement:
; X64: # %bb.0:
; X64-NEXT: vmovdqa (%rdi), %xmm0
; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
; X64-NEXT: vpextrb $6, %xmm0, %eax
; X64-NEXT: movb %al, (%rdx)
; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
; X64-NEXT: retq
%i0 = load <16 x i8>, ptr %origin0
%i1 = load <16 x i8>, ptr %origin1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
Expand All @@ -74,21 +75,23 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movb %al, (%rdx)
; X64-BMI2-NEXT: retq
;
; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X86-NO-BMI2: # %bb.0:
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NEXT: movzwl (%eax), %eax
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
; X86-NO-BMI2-NEXT: movzwl %dx, %edx
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-BMI2-NEXT: shrl %cl, %eax
; X86-NO-BMI2-NEXT: movb %al, (%edx)
; X86-NO-BMI2-NEXT: shrl %cl, %edx
; X86-NO-BMI2-NEXT: movb %dl, (%eax)
; X86-NO-BMI2-NEXT: retl
;
; X86-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
Expand All @@ -97,6 +100,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movb %cl, (%eax)
Expand All @@ -119,6 +123,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
Expand All @@ -128,6 +133,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movw %ax, (%rdx)
Expand All @@ -139,6 +145,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
; X86-NO-BMI2-NEXT: movzwl %dx, %edx
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-BMI2-NEXT: shrl %cl, %edx
Expand All @@ -151,6 +158,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movw %cx, (%eax)
Expand Down