Skip to content

Commit e4d01bb

Browse files
authored
[SCEV] Special case sext in isKnownNonZero (#77834)
The existing logic in isKnownNonZero relies on unsigned ranges, which can be problematic when our range calculation is imprecise. Consider the following: %offset.nonzero = or i32 %offset, 1 --> %offset.nonzero U: [1,0) S: [1,0) %offset.i64 = sext i32 %offset.nonzero to i64 --> (sext i32 %offset.nonzero to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Note that the unsigned range for the sext does contain zero in this case despite the fact that it can never actually be zero. Instead, we can push the query down one level - relying on the fact that the sext is an invertible operation and that the result can only be zero if the input is. We could likely generalize this reasoning for other invertible operations, but special casing sext seems worthwhile.
1 parent 89c0ea8 commit e4d01bb

File tree

3 files changed

+118
-122
lines changed

3 files changed

+118
-122
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10721,6 +10721,10 @@ bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
1072110721
}
1072210722

1072310723
bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
10724+
// Query push down for cases where the unsigned range is
10725+
// less than sufficient.
10726+
if (const auto *SExt = dyn_cast<SCEVSignExtendExpr>(S))
10727+
return isKnownNonZero(SExt->getOperand(0));
1072410728
return getUnsignedRangeMin(S) != 0;
1072510729
}
1072610730

llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll

Lines changed: 87 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,26 @@ define i64 @test_no_prep(ptr %arg, i32 signext %arg1) {
2424
; CHECK-NEXT: cmplwi r4, 0
2525
; CHECK-NEXT: beq cr0, .LBB0_4
2626
; CHECK-NEXT: # %bb.1: # %bb3.preheader
27-
; CHECK-NEXT: cmpldi r4, 1
28-
; CHECK-NEXT: li r5, 1
29-
; CHECK-NEXT: addi r3, r3, 4004
27+
; CHECK-NEXT: mtctr r4
28+
; CHECK-NEXT: addi r5, r3, 4004
29+
; CHECK-NEXT: li r3, 0
3030
; CHECK-NEXT: li r6, -3
3131
; CHECK-NEXT: li r7, -2
3232
; CHECK-NEXT: li r8, -1
33-
; CHECK-NEXT: iselgt r5, r4, r5
34-
; CHECK-NEXT: mtctr r5
35-
; CHECK-NEXT: li r5, 0
3633
; CHECK-NEXT: .p2align 4
3734
; CHECK-NEXT: .LBB0_2: # %bb3
3835
; CHECK-NEXT: #
39-
; CHECK-NEXT: ldx r9, r3, r6
40-
; CHECK-NEXT: ldx r10, r3, r7
41-
; CHECK-NEXT: ldx r11, r3, r8
42-
; CHECK-NEXT: ld r12, 0(r3)
43-
; CHECK-NEXT: addi r3, r3, 1
36+
; CHECK-NEXT: ldx r9, r5, r6
37+
; CHECK-NEXT: ldx r10, r5, r7
38+
; CHECK-NEXT: ldx r11, r5, r8
39+
; CHECK-NEXT: ld r12, 0(r5)
40+
; CHECK-NEXT: addi r5, r5, 1
4441
; CHECK-NEXT: mulld r9, r10, r9
4542
; CHECK-NEXT: mulld r9, r9, r11
46-
; CHECK-NEXT: maddld r5, r9, r12, r5
43+
; CHECK-NEXT: maddld r3, r9, r12, r3
4744
; CHECK-NEXT: bdnz .LBB0_2
4845
; CHECK-NEXT: # %bb.3: # %bb25
49-
; CHECK-NEXT: add r3, r5, r4
46+
; CHECK-NEXT: add r3, r3, r4
5047
; CHECK-NEXT: blr
5148
; CHECK-NEXT: .LBB0_4:
5249
; CHECK-NEXT: addi r3, r4, 0
@@ -105,21 +102,19 @@ define i64 @test_ds_prep(ptr %arg, i32 signext %arg1) {
105102
; CHECK-NEXT: cmplwi r4, 0
106103
; CHECK-NEXT: beq cr0, .LBB1_4
107104
; CHECK-NEXT: # %bb.1: # %bb3.preheader
108-
; CHECK-NEXT: cmpldi r4, 1
109-
; CHECK-NEXT: li r5, 1
110-
; CHECK-NEXT: addi r6, r3, 4002
111-
; CHECK-NEXT: li r7, -1
112-
; CHECK-NEXT: iselgt r3, r4, r5
113-
; CHECK-NEXT: mtctr r3
105+
; CHECK-NEXT: mtctr r4
106+
; CHECK-NEXT: addi r7, r3, 4002
114107
; CHECK-NEXT: li r3, 0
108+
; CHECK-NEXT: li r5, -1
109+
; CHECK-NEXT: li r6, 1
115110
; CHECK-NEXT: .p2align 4
116111
; CHECK-NEXT: .LBB1_2: # %bb3
117112
; CHECK-NEXT: #
118-
; CHECK-NEXT: ldx r8, r6, r7
119-
; CHECK-NEXT: ld r9, 0(r6)
120-
; CHECK-NEXT: ldx r10, r6, r5
121-
; CHECK-NEXT: ld r11, 4(r6)
122-
; CHECK-NEXT: addi r6, r6, 1
113+
; CHECK-NEXT: ldx r8, r7, r5
114+
; CHECK-NEXT: ld r9, 0(r7)
115+
; CHECK-NEXT: ldx r10, r7, r6
116+
; CHECK-NEXT: ld r11, 4(r7)
117+
; CHECK-NEXT: addi r7, r7, 1
123118
; CHECK-NEXT: mulld r8, r9, r8
124119
; CHECK-NEXT: mulld r8, r8, r10
125120
; CHECK-NEXT: maddld r3, r8, r11, r3
@@ -194,28 +189,26 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
194189
; CHECK-NEXT: cmplwi r4, 0
195190
; CHECK-NEXT: beq cr0, .LBB2_4
196191
; CHECK-NEXT: # %bb.1: # %bb3.preheader
197-
; CHECK-NEXT: cmpldi r4, 1
198-
; CHECK-NEXT: li r5, 1
199-
; CHECK-NEXT: addi r10, r3, 4002
200192
; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
201-
; CHECK-NEXT: li r6, -1
202193
; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
194+
; CHECK-NEXT: addi r10, r3, 4002
195+
; CHECK-NEXT: li r3, 0
196+
; CHECK-NEXT: li r5, -1
197+
; CHECK-NEXT: li r6, 1
203198
; CHECK-NEXT: li r7, 3
204199
; CHECK-NEXT: li r8, 5
205200
; CHECK-NEXT: li r9, 9
206201
; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
207202
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
208203
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
209204
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
210-
; CHECK-NEXT: iselgt r3, r4, r5
211-
; CHECK-NEXT: mtctr r3
212-
; CHECK-NEXT: li r3, 0
205+
; CHECK-NEXT: mtctr r4
213206
; CHECK-NEXT: .p2align 4
214207
; CHECK-NEXT: .LBB2_2: # %bb3
215208
; CHECK-NEXT: #
216-
; CHECK-NEXT: ldx r11, r10, r6
209+
; CHECK-NEXT: ldx r11, r10, r5
217210
; CHECK-NEXT: ld r12, 0(r10)
218-
; CHECK-NEXT: ldx r0, r10, r5
211+
; CHECK-NEXT: ldx r0, r10, r6
219212
; CHECK-NEXT: ldx r30, r10, r7
220213
; CHECK-NEXT: mulld r11, r12, r11
221214
; CHECK-NEXT: ld r29, 4(r10)
@@ -313,26 +306,24 @@ define dso_local i64 @test_update_ds_prep_interact(ptr %arg, i32 signext %arg1)
313306
; CHECK-NEXT: cmplwi r4, 0
314307
; CHECK-NEXT: beq cr0, .LBB3_4
315308
; CHECK-NEXT: # %bb.1: # %bb3.preheader
316-
; CHECK-NEXT: cmpldi r4, 1
317-
; CHECK-NEXT: li r6, 1
318-
; CHECK-NEXT: addi r3, r3, 3998
319-
; CHECK-NEXT: li r7, -1
320-
; CHECK-NEXT: iselgt r5, r4, r6
321-
; CHECK-NEXT: mtctr r5
322-
; CHECK-NEXT: li r5, 0
309+
; CHECK-NEXT: mtctr r4
310+
; CHECK-NEXT: addi r5, r3, 3998
311+
; CHECK-NEXT: li r3, 0
312+
; CHECK-NEXT: li r6, -1
313+
; CHECK-NEXT: li r7, 1
323314
; CHECK-NEXT: .p2align 5
324315
; CHECK-NEXT: .LBB3_2: # %bb3
325316
; CHECK-NEXT: #
326-
; CHECK-NEXT: ldu r8, 4(r3)
327-
; CHECK-NEXT: ldx r9, r3, r7
328-
; CHECK-NEXT: ldx r10, r3, r6
329-
; CHECK-NEXT: ld r11, 4(r3)
317+
; CHECK-NEXT: ldu r8, 4(r5)
318+
; CHECK-NEXT: ldx r9, r5, r6
319+
; CHECK-NEXT: ldx r10, r5, r7
320+
; CHECK-NEXT: ld r11, 4(r5)
330321
; CHECK-NEXT: mulld r8, r8, r9
331322
; CHECK-NEXT: mulld r8, r8, r10
332-
; CHECK-NEXT: maddld r5, r8, r11, r5
323+
; CHECK-NEXT: maddld r3, r8, r11, r3
333324
; CHECK-NEXT: bdnz .LBB3_2
334325
; CHECK-NEXT: # %bb.3: # %bb26
335-
; CHECK-NEXT: add r3, r5, r4
326+
; CHECK-NEXT: add r3, r3, r4
336327
; CHECK-NEXT: blr
337328
; CHECK-NEXT: .LBB3_4:
338329
; CHECK-NEXT: addi r3, r4, 0
@@ -392,28 +383,25 @@ define i64 @test_update_ds_prep_nointeract(ptr %arg, i32 signext %arg1) {
392383
; CHECK-NEXT: cmplwi r4, 0
393384
; CHECK-NEXT: beq cr0, .LBB4_4
394385
; CHECK-NEXT: # %bb.1: # %bb3.preheader
395-
; CHECK-NEXT: cmpldi r4, 1
396-
; CHECK-NEXT: li r6, 1
386+
; CHECK-NEXT: mtctr r4
397387
; CHECK-NEXT: addi r5, r3, 4000
398-
; CHECK-NEXT: addi r3, r3, 4003
388+
; CHECK-NEXT: addi r6, r3, 4003
389+
; CHECK-NEXT: li r3, 0
399390
; CHECK-NEXT: li r7, -1
400-
; CHECK-NEXT: iselgt r6, r4, r6
401-
; CHECK-NEXT: mtctr r6
402-
; CHECK-NEXT: li r6, 0
403391
; CHECK-NEXT: .p2align 4
404392
; CHECK-NEXT: .LBB4_2: # %bb3
405393
; CHECK-NEXT: #
406394
; CHECK-NEXT: lbzu r8, 1(r5)
407-
; CHECK-NEXT: ldx r9, r3, r7
408-
; CHECK-NEXT: ld r10, 0(r3)
409-
; CHECK-NEXT: ld r11, 4(r3)
410-
; CHECK-NEXT: addi r3, r3, 1
395+
; CHECK-NEXT: ldx r9, r6, r7
396+
; CHECK-NEXT: ld r10, 0(r6)
397+
; CHECK-NEXT: ld r11, 4(r6)
398+
; CHECK-NEXT: addi r6, r6, 1
411399
; CHECK-NEXT: mulld r8, r9, r8
412400
; CHECK-NEXT: mulld r8, r8, r10
413-
; CHECK-NEXT: maddld r6, r8, r11, r6
401+
; CHECK-NEXT: maddld r3, r8, r11, r3
414402
; CHECK-NEXT: bdnz .LBB4_2
415403
; CHECK-NEXT: # %bb.3: # %bb25
416-
; CHECK-NEXT: add r3, r6, r4
404+
; CHECK-NEXT: add r3, r3, r4
417405
; CHECK-NEXT: blr
418406
; CHECK-NEXT: .LBB4_4:
419407
; CHECK-NEXT: addi r3, r4, 0
@@ -477,23 +465,20 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
477465
; CHECK-NEXT: cmplwi r5, 0
478466
; CHECK-NEXT: beq cr0, .LBB5_4
479467
; CHECK-NEXT: # %bb.1: # %bb4.preheader
480-
; CHECK-NEXT: cmpldi r5, 1
481-
; CHECK-NEXT: li r6, 1
482468
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
483-
; CHECK-NEXT: addi r3, r3, 4001
469+
; CHECK-NEXT: addi r6, r3, 4001
484470
; CHECK-NEXT: addi r4, r4, 4001
471+
; CHECK-NEXT: li r3, 0
485472
; CHECK-NEXT: li r7, 9
486-
; CHECK-NEXT: iselgt r6, r5, r6
487-
; CHECK-NEXT: mtctr r6
488-
; CHECK-NEXT: li r6, 0
473+
; CHECK-NEXT: mtctr r5
489474
; CHECK-NEXT: .p2align 4
490475
; CHECK-NEXT: .LBB5_2: # %bb4
491476
; CHECK-NEXT: #
492-
; CHECK-NEXT: ld r8, 0(r3)
493-
; CHECK-NEXT: ldx r9, r3, r7
494-
; CHECK-NEXT: ld r10, 4(r3)
495-
; CHECK-NEXT: ld r11, 8(r3)
496-
; CHECK-NEXT: addi r3, r3, 1
477+
; CHECK-NEXT: ld r8, 0(r6)
478+
; CHECK-NEXT: ldx r9, r6, r7
479+
; CHECK-NEXT: ld r10, 4(r6)
480+
; CHECK-NEXT: ld r11, 8(r6)
481+
; CHECK-NEXT: addi r6, r6, 1
497482
; CHECK-NEXT: mulld r8, r9, r8
498483
; CHECK-NEXT: ld r12, 0(r4)
499484
; CHECK-NEXT: ldx r0, r4, r7
@@ -505,11 +490,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
505490
; CHECK-NEXT: mulld r8, r8, r12
506491
; CHECK-NEXT: mulld r8, r8, r0
507492
; CHECK-NEXT: mulld r8, r8, r30
508-
; CHECK-NEXT: maddld r6, r8, r9, r6
493+
; CHECK-NEXT: maddld r3, r8, r9, r3
509494
; CHECK-NEXT: bdnz .LBB5_2
510495
; CHECK-NEXT: # %bb.3:
511496
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
512-
; CHECK-NEXT: add r3, r6, r5
497+
; CHECK-NEXT: add r3, r3, r5
513498
; CHECK-NEXT: blr
514499
; CHECK-NEXT: .LBB5_4:
515500
; CHECK-NEXT: addi r3, r5, 0
@@ -598,72 +583,70 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
598583
; CHECK-NEXT: beq cr0, .LBB6_9
599584
; CHECK-NEXT: # %bb.1: # %bb3
600585
; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
601-
; CHECK-NEXT: cmpldi r4, 1
602-
; CHECK-NEXT: li r7, 1
603-
; CHECK-NEXT: addi r6, r3, 4009
604586
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
605-
; CHECK-NEXT: ld r5, .LC0@toc@l(r5)
606-
; CHECK-NEXT: iselgt r3, r4, r7
607587
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
588+
; CHECK-NEXT: ld r5, .LC0@toc@l(r5)
608589
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
609-
; CHECK-NEXT: li r4, -7
590+
; CHECK-NEXT: addi r6, r3, 4009
591+
; CHECK-NEXT: li r3, 0
592+
; CHECK-NEXT: li r7, -7
610593
; CHECK-NEXT: li r8, -6
611594
; CHECK-NEXT: li r9, 1
612595
; CHECK-NEXT: li r10, 1
613596
; CHECK-NEXT: li r11, 1
614597
; CHECK-NEXT: li r12, 1
615-
; CHECK-NEXT: li r30, 1
598+
; CHECK-NEXT: li r0, 1
616599
; CHECK-NEXT: ld r5, 0(r5)
617-
; CHECK-NEXT: mtctr r3
618-
; CHECK-NEXT: li r3, 0
600+
; CHECK-NEXT: mtctr r4
601+
; CHECK-NEXT: li r4, 1
619602
; CHECK-NEXT: addi r5, r5, -1
620603
; CHECK-NEXT: b .LBB6_4
621604
; CHECK-NEXT: .p2align 4
622605
; CHECK-NEXT: .LBB6_2: # %bb18
623606
; CHECK-NEXT: #
624-
; CHECK-NEXT: addi r29, r6, -9
625-
; CHECK-NEXT: ld r0, 0(r29)
626-
; CHECK-NEXT: add r30, r0, r30
627-
; CHECK-NEXT: ld r0, -8(r6)
628-
; CHECK-NEXT: add r12, r0, r12
607+
; CHECK-NEXT: addi r30, r6, -9
608+
; CHECK-NEXT: ld r30, 0(r30)
609+
; CHECK-NEXT: add r0, r30, r0
610+
; CHECK-NEXT: ld r30, -8(r6)
611+
; CHECK-NEXT: add r12, r30, r12
629612
; CHECK-NEXT: .LBB6_3: # %bb49
630613
; CHECK-NEXT: #
631-
; CHECK-NEXT: mulld r0, r12, r30
614+
; CHECK-NEXT: mulld r30, r12, r0
632615
; CHECK-NEXT: addi r6, r6, 1
633-
; CHECK-NEXT: mulld r0, r0, r11
634-
; CHECK-NEXT: mulld r0, r0, r10
635-
; CHECK-NEXT: mulld r0, r0, r9
636-
; CHECK-NEXT: maddld r3, r0, r7, r3
616+
; CHECK-NEXT: mulld r30, r30, r11
617+
; CHECK-NEXT: mulld r30, r30, r10
618+
; CHECK-NEXT: mulld r30, r30, r9
619+
; CHECK-NEXT: maddld r3, r30, r4, r3
637620
; CHECK-NEXT: bdz .LBB6_8
638621
; CHECK-NEXT: .LBB6_4: # %bb5
639622
; CHECK-NEXT: #
640-
; CHECK-NEXT: lbzu r0, 1(r5)
641-
; CHECK-NEXT: mulli r29, r0, 171
623+
; CHECK-NEXT: lbzu r30, 1(r5)
624+
; CHECK-NEXT: mulli r29, r30, 171
642625
; CHECK-NEXT: rlwinm r28, r29, 24, 8, 30
643626
; CHECK-NEXT: srwi r29, r29, 9
644627
; CHECK-NEXT: add r29, r29, r28
645-
; CHECK-NEXT: sub r0, r0, r29
646-
; CHECK-NEXT: clrlwi r0, r0, 24
647-
; CHECK-NEXT: cmplwi r0, 1
628+
; CHECK-NEXT: sub r30, r30, r29
629+
; CHECK-NEXT: clrlwi r30, r30, 24
630+
; CHECK-NEXT: cmplwi r30, 1
648631
; CHECK-NEXT: beq cr0, .LBB6_2
649632
; CHECK-NEXT: # %bb.5: # %bb28
650633
; CHECK-NEXT: #
651-
; CHECK-NEXT: cmplwi r0, 2
634+
; CHECK-NEXT: cmplwi r30, 2
652635
; CHECK-NEXT: bne cr0, .LBB6_7
653636
; CHECK-NEXT: # %bb.6: # %bb31
654637
; CHECK-NEXT: #
655-
; CHECK-NEXT: ldx r0, r6, r4
656-
; CHECK-NEXT: add r11, r0, r11
657-
; CHECK-NEXT: ld r0, -4(r6)
658-
; CHECK-NEXT: add r10, r0, r10
638+
; CHECK-NEXT: ldx r30, r6, r7
639+
; CHECK-NEXT: add r11, r30, r11
640+
; CHECK-NEXT: ld r30, -4(r6)
641+
; CHECK-NEXT: add r10, r30, r10
659642
; CHECK-NEXT: b .LBB6_3
660643
; CHECK-NEXT: .p2align 4
661644
; CHECK-NEXT: .LBB6_7: # %bb40
662645
; CHECK-NEXT: #
663-
; CHECK-NEXT: ldx r0, r6, r8
664-
; CHECK-NEXT: add r9, r0, r9
665-
; CHECK-NEXT: ld r0, 0(r6)
666-
; CHECK-NEXT: add r7, r0, r7
646+
; CHECK-NEXT: ldx r30, r6, r8
647+
; CHECK-NEXT: add r9, r30, r9
648+
; CHECK-NEXT: ld r30, 0(r6)
649+
; CHECK-NEXT: add r4, r30, r4
667650
; CHECK-NEXT: b .LBB6_3
668651
; CHECK-NEXT: .LBB6_8:
669652
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload

0 commit comments

Comments
 (0)