Skip to content

Commit 51809e4

Browse files
authored
[DAG] SimplifyDemandedVectorElts - add SimplifyMultipleUse handling to SEXT/ZEXT/TRUNC nodes (#116227)
Allows us to bypass multiple uses of a SEXT/ZEXT/TRUNC node operand
1 parent e508bac commit 51809e4

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3724,6 +3724,11 @@ bool TargetLowering::SimplifyDemandedVectorElts(
37243724
KnownZero, TLO, Depth + 1))
37253725
return true;
37263726

3727+
if (!DemandedElts.isAllOnes())
3728+
if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3729+
Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3730+
return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3731+
37273732
if (Op.getOpcode() == ISD::ZERO_EXTEND) {
37283733
// zext(undef) upper bits are guaranteed to be zero.
37293734
if (DemandedElts.isSubsetOf(KnownUndef))

llvm/test/CodeGen/AArch64/arm64-ld1.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,13 +1148,21 @@ define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
11481148

11491149
; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
11501150
define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp {
1151-
; CHECK-LABEL: ld1r_2s_from_dup:
1152-
; CHECK: // %bb.0: // %entry
1153-
; CHECK-NEXT: ld1r.2s { v0 }, [x0]
1154-
; CHECK-NEXT: ld1r.2s { v1 }, [x1]
1155-
; CHECK-NEXT: usubl.8h v0, v0, v1
1156-
; CHECK-NEXT: str d0, [x2]
1157-
; CHECK-NEXT: ret
1151+
; CHECK-SD-LABEL: ld1r_2s_from_dup:
1152+
; CHECK-SD: // %bb.0: // %entry
1153+
; CHECK-SD-NEXT: ldr s0, [x0]
1154+
; CHECK-SD-NEXT: ldr s1, [x1]
1155+
; CHECK-SD-NEXT: usubl.8h v0, v0, v1
1156+
; CHECK-SD-NEXT: str d0, [x2]
1157+
; CHECK-SD-NEXT: ret
1158+
;
1159+
; CHECK-GI-LABEL: ld1r_2s_from_dup:
1160+
; CHECK-GI: // %bb.0: // %entry
1161+
; CHECK-GI-NEXT: ld1r.2s { v0 }, [x0]
1162+
; CHECK-GI-NEXT: ld1r.2s { v1 }, [x1]
1163+
; CHECK-GI-NEXT: usubl.8h v0, v0, v1
1164+
; CHECK-GI-NEXT: str d0, [x2]
1165+
; CHECK-GI-NEXT: ret
11581166
entry:
11591167
%tmp1 = load i32, ptr %a, align 4
11601168
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0

0 commit comments

Comments
 (0)