Skip to content

Commit 788bbd2

Browse files
committed
[DAGCombiner] Improve chain handling in fold (fshl ld1, ld0, c) -> (ld0[ofs]) combine. (#124871)
Happened to notice some odd things related to chains in this code. The code calls hasOneUse on LoadSDNode* which will check users of the data and the chain. I think this was trying to check that the data had one use so one of the loads would definitely be removed by the transform. Load chains don't always have users so our testing may not have noticed that the chains being used would block the transform. The code makes all users of ld1's chain use the new load's chain, but we don't know that ld1 becomes dead. This can cause incorrect dependencies if ld1's chain is used and it isn't deleted. I think the better thing to do is use makeEquivalentMemoryOrdering to make all users of ld0 and ld1 depend on the new load and the original loads. If the olds loads become dead, their chain will be cleaned up later. I'm having trouble getting a test for any ordering issue with the current code. areNonVolatileConsecutiveLoads requires the two loads to have the same input chain. Given that, I don't know how to use one of the load chain results without also using the other. If they are both used we don't do the transform because SDNode::hasOneUse will return false for both.
1 parent c3b7894 commit 788bbd2

File tree

2 files changed

+17
-49
lines changed

2 files changed

+17
-49
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11005,8 +11005,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
1100511005
auto *RHS = dyn_cast<LoadSDNode>(N1);
1100611006
if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
1100711007
LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008-
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11009-
ISD::isNON_EXTLoad(LHS)) {
11008+
(LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11009+
ISD::isNON_EXTLoad(RHS) && ISD::isNON_EXTLoad(LHS)) {
1101011010
if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
1101111011
SDLoc DL(RHS);
1101211012
uint64_t PtrOff =
@@ -11024,9 +11024,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
1102411024
VT, DL, RHS->getChain(), NewPtr,
1102511025
RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
1102611026
RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11027-
// Replace the old load's chain with the new load's chain.
11028-
WorklistRemover DeadNodes(*this);
11029-
DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
11027+
DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11028+
DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
1103011029
return Load;
1103111030
}
1103211031
}

llvm/test/CodeGen/X86/fshl.ll

Lines changed: 13 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -658,51 +658,20 @@ define i64 @combine_fshl_load_i64(ptr %p) nounwind {
658658
}
659659

660660
define i16 @combine_fshl_load_i16_chain_use(ptr %p, ptr %q, i16 %r) nounwind {
661-
; X86-FAST-LABEL: combine_fshl_load_i16_chain_use:
662-
; X86-FAST: # %bb.0:
663-
; X86-FAST-NEXT: pushl %esi
664-
; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
665-
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
666-
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
667-
; X86-FAST-NEXT: movzwl (%eax), %esi
668-
; X86-FAST-NEXT: movzwl 2(%eax), %eax
669-
; X86-FAST-NEXT: shldw $8, %si, %ax
670-
; X86-FAST-NEXT: movw %cx, (%edx)
671-
; X86-FAST-NEXT: popl %esi
672-
; X86-FAST-NEXT: retl
673-
;
674-
; X86-SLOW-LABEL: combine_fshl_load_i16_chain_use:
675-
; X86-SLOW: # %bb.0:
676-
; X86-SLOW-NEXT: pushl %esi
677-
; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
678-
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
679-
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
680-
; X86-SLOW-NEXT: movzwl 2(%esi), %eax
681-
; X86-SLOW-NEXT: movzbl 1(%esi), %esi
682-
; X86-SLOW-NEXT: shll $8, %eax
683-
; X86-SLOW-NEXT: orl %esi, %eax
684-
; X86-SLOW-NEXT: movw %cx, (%edx)
685-
; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
686-
; X86-SLOW-NEXT: popl %esi
687-
; X86-SLOW-NEXT: retl
688-
;
689-
; X64-FAST-LABEL: combine_fshl_load_i16_chain_use:
690-
; X64-FAST: # %bb.0:
691-
; X64-FAST-NEXT: movzwl (%rdi), %ecx
692-
; X64-FAST-NEXT: movzwl 2(%rdi), %eax
693-
; X64-FAST-NEXT: shldw $8, %cx, %ax
694-
; X64-FAST-NEXT: movw %dx, (%rsi)
695-
; X64-FAST-NEXT: retq
661+
; X86-LABEL: combine_fshl_load_i16_chain_use:
662+
; X86: # %bb.0:
663+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
664+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
665+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
666+
; X86-NEXT: movzwl 1(%eax), %eax
667+
; X86-NEXT: movw %cx, (%edx)
668+
; X86-NEXT: retl
696669
;
697-
; X64-SLOW-LABEL: combine_fshl_load_i16_chain_use:
698-
; X64-SLOW: # %bb.0:
699-
; X64-SLOW-NEXT: movzwl 2(%rdi), %eax
700-
; X64-SLOW-NEXT: movzbl 1(%rdi), %ecx
701-
; X64-SLOW-NEXT: shll $8, %eax
702-
; X64-SLOW-NEXT: orl %ecx, %eax
703-
; X64-SLOW-NEXT: movw %dx, (%rsi)
704-
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
705-
; X64-SLOW-NEXT: retq
670+
; X64-LABEL: combine_fshl_load_i16_chain_use:
671+
; X64: # %bb.0:
672+
; X64-NEXT: movzwl 1(%rdi), %eax
673+
; X64-NEXT: movw %dx, (%rsi)
674+
; X64-NEXT: retq
706675
%p1 = getelementptr i16, ptr %p, i32 1
707676
%ld0 = load i16, ptr %p
708677
%ld1 = load i16, ptr %p1

0 commit comments

Comments
 (0)