Skip to content

[LICM] Prevent fold and hoist of binary ops with over 2 uses #102114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2806,7 +2806,8 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
return false;

auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative())
if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
BO0->hasNUsesOrMore(3))
return false;

// Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
Expand Down
313 changes: 154 additions & 159 deletions llvm/test/CodeGen/PowerPC/common-chain.ll
Original file line number Diff line number Diff line change
Expand Up @@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: sldi r4, r6, 2
; CHECK-NEXT: li r6, 1
; CHECK-NEXT: mr r0, r10
; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
; CHECK-NEXT: cmpdi r4, 1
; CHECK-NEXT: iselgt r4, r4, r6
; CHECK-NEXT: addi r7, r4, -1
; CHECK-NEXT: clrldi r6, r4, 63
; CHECK-NEXT: cmpldi r7, 3
; CHECK-NEXT: sldi r6, r6, 2
; CHECK-NEXT: li r7, 1
; CHECK-NEXT: mr r30, r10
; CHECK-NEXT: cmpdi r6, 1
; CHECK-NEXT: iselgt r7, r6, r7
; CHECK-NEXT: addi r8, r7, -1
; CHECK-NEXT: clrldi r6, r7, 63
; CHECK-NEXT: cmpldi r8, 3
; CHECK-NEXT: blt cr0, .LBB7_4
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: rldicl r7, r4, 62, 2
; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r11, r0, r30
; CHECK-NEXT: add r4, r0, r0
; CHECK-NEXT: mulli r23, r0, 24
; CHECK-NEXT: add r14, r0, r8
; CHECK-NEXT: sldi r12, r0, 5
; CHECK-NEXT: add r31, r0, r9
; CHECK-NEXT: sldi r9, r9, 3
; CHECK-NEXT: sldi r18, r0, 4
; CHECK-NEXT: sldi r8, r8, 3
; CHECK-NEXT: add r10, r4, r4
; CHECK-NEXT: sldi r4, r30, 3
; CHECK-NEXT: sldi r11, r11, 3
; CHECK-NEXT: add r26, r12, r9
; CHECK-NEXT: add r16, r18, r9
; CHECK-NEXT: add r29, r12, r8
; CHECK-NEXT: add r19, r18, r8
; CHECK-NEXT: add r30, r12, r4
; CHECK-NEXT: mr r20, r4
; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r15, r5, r11
; CHECK-NEXT: sldi r11, r14, 3
; CHECK-NEXT: add r29, r5, r29
; CHECK-NEXT: add r28, r3, r26
; CHECK-NEXT: add r19, r5, r19
; CHECK-NEXT: add r21, r23, r9
; CHECK-NEXT: add r24, r23, r8
; CHECK-NEXT: add r14, r5, r11
; CHECK-NEXT: sldi r11, r31, 3
; CHECK-NEXT: add r25, r23, r20
; CHECK-NEXT: add r20, r18, r20
; CHECK-NEXT: add r30, r5, r30
; CHECK-NEXT: add r18, r3, r16
; CHECK-NEXT: add r24, r5, r24
; CHECK-NEXT: add r23, r3, r21
; CHECK-NEXT: add r27, r4, r26
; CHECK-NEXT: add r22, r4, r21
; CHECK-NEXT: add r17, r4, r16
; CHECK-NEXT: add r2, r4, r11
; CHECK-NEXT: rldicl r4, r7, 2, 1
; CHECK-NEXT: sub r7, r8, r9
; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: mulli r24, r30, 24
; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: rldicl r0, r7, 62, 2
; CHECK-NEXT: sldi r11, r30, 5
; CHECK-NEXT: sldi r19, r30, 4
; CHECK-NEXT: sldi r7, r14, 3
; CHECK-NEXT: add r14, r30, r14
; CHECK-NEXT: sldi r10, r16, 3
; CHECK-NEXT: sldi r12, r15, 3
; CHECK-NEXT: add r16, r30, r16
; CHECK-NEXT: add r15, r30, r15
; CHECK-NEXT: add r27, r11, r7
; CHECK-NEXT: add r22, r24, r7
; CHECK-NEXT: add r17, r19, r7
; CHECK-NEXT: sldi r2, r14, 3
; CHECK-NEXT: add r26, r24, r10
; CHECK-NEXT: add r25, r24, r12
; CHECK-NEXT: add r21, r19, r10
; CHECK-NEXT: add r20, r19, r12
; CHECK-NEXT: add r8, r11, r10
; CHECK-NEXT: sldi r16, r16, 3
; CHECK-NEXT: add r29, r5, r27
; CHECK-NEXT: add r28, r4, r27
; CHECK-NEXT: add r27, r3, r27
; CHECK-NEXT: add r24, r5, r22
; CHECK-NEXT: add r23, r4, r22
; CHECK-NEXT: add r22, r3, r22
; CHECK-NEXT: add r19, r5, r17
; CHECK-NEXT: add r18, r4, r17
; CHECK-NEXT: add r17, r3, r17
; CHECK-NEXT: add r14, r5, r2
; CHECK-NEXT: add r31, r4, r2
; CHECK-NEXT: add r2, r3, r2
; CHECK-NEXT: add r9, r5, r8
; CHECK-NEXT: add r8, r11, r12
; CHECK-NEXT: add r26, r5, r26
; CHECK-NEXT: add r25, r5, r25
; CHECK-NEXT: add r21, r5, r21
; CHECK-NEXT: add r20, r5, r20
; CHECK-NEXT: add r16, r5, r16
; CHECK-NEXT: add r31, r5, r11
; CHECK-NEXT: add r11, r3, r11
; CHECK-NEXT: addi r4, r4, -4
; CHECK-NEXT: rldicl r4, r4, 62, 2
; CHECK-NEXT: sub r8, r8, r9
; CHECK-NEXT: li r9, 0
; CHECK-NEXT: addi r4, r4, 1
; CHECK-NEXT: mtctr r4
; CHECK-NEXT: add r8, r5, r8
; CHECK-NEXT: rldicl r3, r0, 2, 1
; CHECK-NEXT: addi r3, r3, -4
; CHECK-NEXT: sub r0, r12, r7
; CHECK-NEXT: sub r12, r10, r7
; CHECK-NEXT: li r7, 0
; CHECK-NEXT: mr r10, r30
; CHECK-NEXT: sldi r15, r15, 3
; CHECK-NEXT: add r15, r5, r15
; CHECK-NEXT: rldicl r3, r3, 62, 2
; CHECK-NEXT: addi r3, r3, 1
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lfd f0, 0(r11)
; CHECK-NEXT: lfd f1, 0(r2)
; CHECK-NEXT: add r0, r0, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f0, 0(r2)
; CHECK-NEXT: lfd f1, 0(r31)
; CHECK-NEXT: add r3, r10, r30
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r14)
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: add r10, r3, r30
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r31)
; CHECK-NEXT: add r31, r31, r12
; CHECK-NEXT: lfdx f0, r11, r7
; CHECK-NEXT: lfdx f1, r2, r7
; CHECK-NEXT: stfd f0, 0(r14)
; CHECK-NEXT: add r14, r14, r11
; CHECK-NEXT: lfdx f0, r2, r0
; CHECK-NEXT: lfdx f1, r31, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r14, r9
; CHECK-NEXT: lfdx f1, r15, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r14, r9
; CHECK-NEXT: lfdx f0, r11, r8
; CHECK-NEXT: lfdx f1, r2, r8
; CHECK-NEXT: add r11, r11, r12
; CHECK-NEXT: add r2, r2, r12
; CHECK-NEXT: stfdx f0, r15, r7
; CHECK-NEXT: lfdx f0, r2, r12
; CHECK-NEXT: lfdx f1, r31, r12
; CHECK-NEXT: add r2, r2, r11
; CHECK-NEXT: add r31, r31, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r15, r9
; CHECK-NEXT: lfdx f1, r16, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r15, r9
; CHECK-NEXT: lfd f0, 0(r18)
; CHECK-NEXT: lfd f1, 0(r17)
; CHECK-NEXT: stfdx f0, r16, r7
; CHECK-NEXT: lfd f0, 0(r17)
; CHECK-NEXT: lfd f1, 0(r18)
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r16, r9
; CHECK-NEXT: lfdx f1, r19, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r16, r9
; CHECK-NEXT: lfdx f0, r18, r7
; CHECK-NEXT: lfdx f1, r17, r7
; CHECK-NEXT: stfdx f0, r19, r7
; CHECK-NEXT: lfdx f0, r17, r0
; CHECK-NEXT: lfdx f1, r18, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r19, r9
; CHECK-NEXT: lfdx f1, r20, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r19, r9
; CHECK-NEXT: lfdx f0, r18, r8
; CHECK-NEXT: lfdx f1, r17, r8
; CHECK-NEXT: add r18, r18, r12
; CHECK-NEXT: add r17, r17, r12
; CHECK-NEXT: stfdx f0, r20, r7
; CHECK-NEXT: lfdx f0, r17, r12
; CHECK-NEXT: lfdx f1, r18, r12
; CHECK-NEXT: add r17, r17, r11
; CHECK-NEXT: add r18, r18, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r20, r9
; CHECK-NEXT: lfdx f1, r21, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r20, r9
; CHECK-NEXT: lfd f0, 0(r23)
; CHECK-NEXT: lfd f1, 0(r22)
; CHECK-NEXT: stfdx f0, r21, r7
; CHECK-NEXT: lfd f0, 0(r22)
; CHECK-NEXT: lfd f1, 0(r23)
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r21, r9
; CHECK-NEXT: lfdx f1, r24, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r21, r9
; CHECK-NEXT: lfdx f0, r23, r7
; CHECK-NEXT: lfdx f1, r22, r7
; CHECK-NEXT: stfdx f0, r24, r7
; CHECK-NEXT: lfdx f0, r22, r0
; CHECK-NEXT: lfdx f1, r23, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r24, r9
; CHECK-NEXT: lfdx f1, r25, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r24, r9
; CHECK-NEXT: lfdx f0, r23, r8
; CHECK-NEXT: lfdx f1, r22, r8
; CHECK-NEXT: add r23, r23, r12
; CHECK-NEXT: add r22, r22, r12
; CHECK-NEXT: stfdx f0, r25, r7
; CHECK-NEXT: lfdx f0, r22, r12
; CHECK-NEXT: lfdx f1, r23, r12
; CHECK-NEXT: add r22, r22, r11
; CHECK-NEXT: add r23, r23, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r25, r9
; CHECK-NEXT: lfdx f1, r26, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r25, r9
; CHECK-NEXT: lfd f0, 0(r28)
; CHECK-NEXT: lfd f1, 0(r27)
; CHECK-NEXT: stfdx f0, r26, r7
; CHECK-NEXT: lfd f0, 0(r27)
; CHECK-NEXT: lfd f1, 0(r28)
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r26, r9
; CHECK-NEXT: lfdx f1, r29, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r26, r9
; CHECK-NEXT: lfdx f0, r28, r7
; CHECK-NEXT: lfdx f1, r27, r7
; CHECK-NEXT: stfdx f0, r29, r7
; CHECK-NEXT: lfdx f0, r27, r0
; CHECK-NEXT: lfdx f1, r28, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r29, r9
; CHECK-NEXT: lfdx f1, r8, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r29, r9
; CHECK-NEXT: lfdx f0, r28, r8
; CHECK-NEXT: lfdx f1, r27, r8
; CHECK-NEXT: add r28, r28, r12
; CHECK-NEXT: add r27, r27, r12
; CHECK-NEXT: stfdx f0, r8, r7
; CHECK-NEXT: lfdx f0, r27, r12
; CHECK-NEXT: lfdx f1, r28, r12
; CHECK-NEXT: add r27, r27, r11
; CHECK-NEXT: add r28, r28, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r30, r9
; CHECK-NEXT: lfdx f1, r9, r7
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r30, r9
; CHECK-NEXT: add r9, r9, r12
; CHECK-NEXT: stfdx f0, r9, r7
; CHECK-NEXT: add r7, r7, r11
; CHECK-NEXT: bdnz .LBB7_3
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: sldi r7, r7, 3
; CHECK-NEXT: add r4, r0, r4
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: add r3, r5, r4
; CHECK-NEXT: add r8, r29, r4
; CHECK-NEXT: add r9, r30, r4
; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r4, r0, r4
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: add r10, r5, r4
; CHECK-NEXT: add r11, r29, r4
; CHECK-NEXT: add r12, r30, r4
; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r4, r0, r4
; CHECK-NEXT: sldi r0, r4, 3
; CHECK-NEXT: add r5, r5, r0
; CHECK-NEXT: add r4, r29, r0
; CHECK-NEXT: add r30, r30, r0
; CHECK-NEXT: li r0, 0
; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: sldi r8, r30, 3
; CHECK-NEXT: add r3, r10, r3
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: add r7, r5, r3
; CHECK-NEXT: add r9, r4, r3
; CHECK-NEXT: add r11, r0, r3
; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r3, r10, r3
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: add r12, r5, r3
; CHECK-NEXT: add r30, r4, r3
; CHECK-NEXT: add r29, r0, r3
; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r3, r10, r3
; CHECK-NEXT: li r10, 0
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: add r5, r5, r3
; CHECK-NEXT: add r4, r4, r3
; CHECK-NEXT: add r3, r0, r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
; CHECK-NEXT: lfdx f0, r30, r0
; CHECK-NEXT: lfdx f1, r4, r0
; CHECK-NEXT: lfdx f0, r3, r10
; CHECK-NEXT: lfdx f1, r4, r10
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
; CHECK-NEXT: add r5, r5, r7
; CHECK-NEXT: lfdx f0, r12, r0
; CHECK-NEXT: lfdx f1, r11, r0
; CHECK-NEXT: add r5, r5, r8
; CHECK-NEXT: lfdx f0, r29, r10
; CHECK-NEXT: lfdx f1, r30, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r10, r0
; CHECK-NEXT: lfdx f1, r12, r10
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r10, r0
; CHECK-NEXT: lfdx f0, r9, r0
; CHECK-NEXT: lfdx f1, r8, r0
; CHECK-NEXT: stfdx f0, r12, r10
; CHECK-NEXT: lfdx f0, r11, r10
; CHECK-NEXT: lfdx f1, r9, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r3, r0
; CHECK-NEXT: lfdx f1, r7, r10
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r3, r0
; CHECK-NEXT: add r0, r0, r7
; CHECK-NEXT: stfdx f0, r7, r10
; CHECK-NEXT: add r10, r10, r8
; CHECK-NEXT: bne cr0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
Expand Down
Loading
Loading