Skip to content

Commit 5eb24fd

Browse files
authored
[SelectionDAG][RISCV] Preserve nneg flag when folding (trunc (zext X))->(zext X). (#144807)
If X is known non-negative, that's still true if we fold the truncate to create a smaller zext. In the i128 tests, SelectionDAGBuilder aggressively truncates the `zext nneg` to i64 to match `getShiftAmountTy`. If we don't preserve the `nneg` we can't see that the shift amount argument being `signext` means we don't need to do any extension
1 parent fdb5726 commit 5eb24fd

File tree

3 files changed

+307
-4
lines changed

3 files changed

+307
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15740,8 +15740,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1574015740
N0.getOpcode() == ISD::SIGN_EXTEND ||
1574115741
N0.getOpcode() == ISD::ANY_EXTEND) {
1574215742
// if the source is smaller than the dest, we still need an extend.
15743-
if (N0.getOperand(0).getValueType().bitsLT(VT))
15744-
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15743+
if (N0.getOperand(0).getValueType().bitsLT(VT)) {
15744+
SDNodeFlags Flags;
15745+
if (N0.getOpcode() == ISD::ZERO_EXTEND)
15746+
Flags.setNonNeg(N0->getFlags().hasNonNeg());
15747+
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15748+
}
1574515749
// if the source is larger than the dest, than we just need the truncate.
1574615750
if (N0.getOperand(0).getValueType().bitsGT(VT))
1574715751
return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6474,8 +6474,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
64746474
OpOpcode == ISD::ANY_EXTEND) {
64756475
// If the source is smaller than the dest, we still need an extend.
64766476
if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
6477-
VT.getScalarType()))
6478-
return getNode(OpOpcode, DL, VT, N1.getOperand(0));
6477+
VT.getScalarType())) {
6478+
SDNodeFlags Flags;
6479+
if (OpOpcode == ISD::ZERO_EXTEND)
6480+
Flags.setNonNeg(N1->getFlags().hasNonNeg());
6481+
return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
6482+
}
64796483
if (N1.getOperand(0).getValueType().bitsGT(VT))
64806484
return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
64816485
return N1.getOperand(0);

llvm/test/CodeGen/RISCV/shifts.ll

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,3 +484,298 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
484484
%res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b)
485485
ret i128 %res
486486
}
487+
488+
define i64 @lshr64_shamt32(i64 %a, i32 signext %b) nounwind {
489+
; RV32I-LABEL: lshr64_shamt32:
490+
; RV32I: # %bb.0:
491+
; RV32I-NEXT: addi a4, a2, -32
492+
; RV32I-NEXT: srl a3, a1, a2
493+
; RV32I-NEXT: bltz a4, .LBB11_2
494+
; RV32I-NEXT: # %bb.1:
495+
; RV32I-NEXT: mv a0, a3
496+
; RV32I-NEXT: j .LBB11_3
497+
; RV32I-NEXT: .LBB11_2:
498+
; RV32I-NEXT: srl a0, a0, a2
499+
; RV32I-NEXT: not a2, a2
500+
; RV32I-NEXT: slli a1, a1, 1
501+
; RV32I-NEXT: sll a1, a1, a2
502+
; RV32I-NEXT: or a0, a0, a1
503+
; RV32I-NEXT: .LBB11_3:
504+
; RV32I-NEXT: srai a1, a4, 31
505+
; RV32I-NEXT: and a1, a1, a3
506+
; RV32I-NEXT: ret
507+
;
508+
; RV64I-LABEL: lshr64_shamt32:
509+
; RV64I: # %bb.0:
510+
; RV64I-NEXT: srl a0, a0, a1
511+
; RV64I-NEXT: ret
512+
%zext = zext nneg i32 %b to i64
513+
%1 = lshr i64 %a, %zext
514+
ret i64 %1
515+
}
516+
517+
define i64 @ashr64_shamt32(i64 %a, i32 signext %b) nounwind {
518+
; RV32I-LABEL: ashr64_shamt32:
519+
; RV32I: # %bb.0:
520+
; RV32I-NEXT: mv a3, a1
521+
; RV32I-NEXT: addi a4, a2, -32
522+
; RV32I-NEXT: sra a1, a1, a2
523+
; RV32I-NEXT: bltz a4, .LBB12_2
524+
; RV32I-NEXT: # %bb.1:
525+
; RV32I-NEXT: srai a3, a3, 31
526+
; RV32I-NEXT: mv a0, a1
527+
; RV32I-NEXT: mv a1, a3
528+
; RV32I-NEXT: ret
529+
; RV32I-NEXT: .LBB12_2:
530+
; RV32I-NEXT: srl a0, a0, a2
531+
; RV32I-NEXT: not a2, a2
532+
; RV32I-NEXT: slli a3, a3, 1
533+
; RV32I-NEXT: sll a2, a3, a2
534+
; RV32I-NEXT: or a0, a0, a2
535+
; RV32I-NEXT: ret
536+
;
537+
; RV64I-LABEL: ashr64_shamt32:
538+
; RV64I: # %bb.0:
539+
; RV64I-NEXT: sra a0, a0, a1
540+
; RV64I-NEXT: ret
541+
%zext = zext nneg i32 %b to i64
542+
%1 = ashr i64 %a, %zext
543+
ret i64 %1
544+
}
545+
546+
define i64 @shl64_shamt32(i64 %a, i32 signext %b) nounwind {
547+
; RV32I-LABEL: shl64_shamt32:
548+
; RV32I: # %bb.0:
549+
; RV32I-NEXT: addi a4, a2, -32
550+
; RV32I-NEXT: sll a3, a0, a2
551+
; RV32I-NEXT: bltz a4, .LBB13_2
552+
; RV32I-NEXT: # %bb.1:
553+
; RV32I-NEXT: mv a1, a3
554+
; RV32I-NEXT: j .LBB13_3
555+
; RV32I-NEXT: .LBB13_2:
556+
; RV32I-NEXT: sll a1, a1, a2
557+
; RV32I-NEXT: not a2, a2
558+
; RV32I-NEXT: srli a0, a0, 1
559+
; RV32I-NEXT: srl a0, a0, a2
560+
; RV32I-NEXT: or a1, a1, a0
561+
; RV32I-NEXT: .LBB13_3:
562+
; RV32I-NEXT: srai a0, a4, 31
563+
; RV32I-NEXT: and a0, a0, a3
564+
; RV32I-NEXT: ret
565+
;
566+
; RV64I-LABEL: shl64_shamt32:
567+
; RV64I: # %bb.0:
568+
; RV64I-NEXT: sll a0, a0, a1
569+
; RV64I-NEXT: ret
570+
%zext = zext nneg i32 %b to i64
571+
%1 = shl i64 %a, %zext
572+
ret i64 %1
573+
}
574+
575+
define i128 @lshr128_shamt32(i128 %a, i32 signext %b) nounwind {
576+
; RV32I-LABEL: lshr128_shamt32:
577+
; RV32I: # %bb.0:
578+
; RV32I-NEXT: addi sp, sp, -32
579+
; RV32I-NEXT: lw a3, 0(a1)
580+
; RV32I-NEXT: lw a4, 4(a1)
581+
; RV32I-NEXT: lw a5, 8(a1)
582+
; RV32I-NEXT: lw a1, 12(a1)
583+
; RV32I-NEXT: sw zero, 16(sp)
584+
; RV32I-NEXT: sw zero, 20(sp)
585+
; RV32I-NEXT: sw zero, 24(sp)
586+
; RV32I-NEXT: sw zero, 28(sp)
587+
; RV32I-NEXT: srli a6, a2, 3
588+
; RV32I-NEXT: mv a7, sp
589+
; RV32I-NEXT: andi t0, a2, 31
590+
; RV32I-NEXT: andi a6, a6, 12
591+
; RV32I-NEXT: xori t0, t0, 31
592+
; RV32I-NEXT: add a6, a7, a6
593+
; RV32I-NEXT: sw a3, 0(sp)
594+
; RV32I-NEXT: sw a4, 4(sp)
595+
; RV32I-NEXT: sw a5, 8(sp)
596+
; RV32I-NEXT: sw a1, 12(sp)
597+
; RV32I-NEXT: lw a1, 0(a6)
598+
; RV32I-NEXT: lw a3, 4(a6)
599+
; RV32I-NEXT: lw a4, 8(a6)
600+
; RV32I-NEXT: lw a5, 12(a6)
601+
; RV32I-NEXT: srl a1, a1, a2
602+
; RV32I-NEXT: slli a6, a3, 1
603+
; RV32I-NEXT: srl a3, a3, a2
604+
; RV32I-NEXT: slli a7, a4, 1
605+
; RV32I-NEXT: srl a4, a4, a2
606+
; RV32I-NEXT: srl a2, a5, a2
607+
; RV32I-NEXT: slli a5, a5, 1
608+
; RV32I-NEXT: sll a6, a6, t0
609+
; RV32I-NEXT: sll a7, a7, t0
610+
; RV32I-NEXT: sll a5, a5, t0
611+
; RV32I-NEXT: or a1, a1, a6
612+
; RV32I-NEXT: or a3, a3, a7
613+
; RV32I-NEXT: or a4, a4, a5
614+
; RV32I-NEXT: sw a1, 0(a0)
615+
; RV32I-NEXT: sw a3, 4(a0)
616+
; RV32I-NEXT: sw a4, 8(a0)
617+
; RV32I-NEXT: sw a2, 12(a0)
618+
; RV32I-NEXT: addi sp, sp, 32
619+
; RV32I-NEXT: ret
620+
;
621+
; RV64I-LABEL: lshr128_shamt32:
622+
; RV64I: # %bb.0:
623+
; RV64I-NEXT: addi a4, a2, -64
624+
; RV64I-NEXT: srl a3, a1, a2
625+
; RV64I-NEXT: bltz a4, .LBB14_2
626+
; RV64I-NEXT: # %bb.1:
627+
; RV64I-NEXT: mv a0, a3
628+
; RV64I-NEXT: j .LBB14_3
629+
; RV64I-NEXT: .LBB14_2:
630+
; RV64I-NEXT: srl a0, a0, a2
631+
; RV64I-NEXT: not a2, a2
632+
; RV64I-NEXT: slli a1, a1, 1
633+
; RV64I-NEXT: sll a1, a1, a2
634+
; RV64I-NEXT: or a0, a0, a1
635+
; RV64I-NEXT: .LBB14_3:
636+
; RV64I-NEXT: srai a1, a4, 63
637+
; RV64I-NEXT: and a1, a1, a3
638+
; RV64I-NEXT: ret
639+
%zext = zext nneg i32 %b to i128
640+
%1 = lshr i128 %a, %zext
641+
ret i128 %1
642+
}
643+
644+
define i128 @ashr128_shamt32(i128 %a, i32 signext %b) nounwind {
645+
; RV32I-LABEL: ashr128_shamt32:
646+
; RV32I: # %bb.0:
647+
; RV32I-NEXT: addi sp, sp, -32
648+
; RV32I-NEXT: lw a3, 0(a1)
649+
; RV32I-NEXT: lw a4, 4(a1)
650+
; RV32I-NEXT: lw a5, 8(a1)
651+
; RV32I-NEXT: lw a1, 12(a1)
652+
; RV32I-NEXT: srli a6, a2, 3
653+
; RV32I-NEXT: mv a7, sp
654+
; RV32I-NEXT: andi t0, a2, 31
655+
; RV32I-NEXT: andi a6, a6, 12
656+
; RV32I-NEXT: xori t0, t0, 31
657+
; RV32I-NEXT: add a6, a7, a6
658+
; RV32I-NEXT: sw a3, 0(sp)
659+
; RV32I-NEXT: sw a4, 4(sp)
660+
; RV32I-NEXT: sw a5, 8(sp)
661+
; RV32I-NEXT: sw a1, 12(sp)
662+
; RV32I-NEXT: srai a1, a1, 31
663+
; RV32I-NEXT: sw a1, 16(sp)
664+
; RV32I-NEXT: sw a1, 20(sp)
665+
; RV32I-NEXT: sw a1, 24(sp)
666+
; RV32I-NEXT: sw a1, 28(sp)
667+
; RV32I-NEXT: lw a1, 0(a6)
668+
; RV32I-NEXT: lw a3, 4(a6)
669+
; RV32I-NEXT: lw a4, 8(a6)
670+
; RV32I-NEXT: lw a5, 12(a6)
671+
; RV32I-NEXT: srl a1, a1, a2
672+
; RV32I-NEXT: slli a6, a3, 1
673+
; RV32I-NEXT: srl a3, a3, a2
674+
; RV32I-NEXT: slli a7, a4, 1
675+
; RV32I-NEXT: srl a4, a4, a2
676+
; RV32I-NEXT: sra a2, a5, a2
677+
; RV32I-NEXT: slli a5, a5, 1
678+
; RV32I-NEXT: sll a6, a6, t0
679+
; RV32I-NEXT: sll a7, a7, t0
680+
; RV32I-NEXT: sll a5, a5, t0
681+
; RV32I-NEXT: or a1, a1, a6
682+
; RV32I-NEXT: or a3, a3, a7
683+
; RV32I-NEXT: or a4, a4, a5
684+
; RV32I-NEXT: sw a1, 0(a0)
685+
; RV32I-NEXT: sw a3, 4(a0)
686+
; RV32I-NEXT: sw a4, 8(a0)
687+
; RV32I-NEXT: sw a2, 12(a0)
688+
; RV32I-NEXT: addi sp, sp, 32
689+
; RV32I-NEXT: ret
690+
;
691+
; RV64I-LABEL: ashr128_shamt32:
692+
; RV64I: # %bb.0:
693+
; RV64I-NEXT: mv a3, a1
694+
; RV64I-NEXT: addi a4, a2, -64
695+
; RV64I-NEXT: sra a1, a1, a2
696+
; RV64I-NEXT: bltz a4, .LBB15_2
697+
; RV64I-NEXT: # %bb.1:
698+
; RV64I-NEXT: srai a3, a3, 63
699+
; RV64I-NEXT: mv a0, a1
700+
; RV64I-NEXT: mv a1, a3
701+
; RV64I-NEXT: ret
702+
; RV64I-NEXT: .LBB15_2:
703+
; RV64I-NEXT: srl a0, a0, a2
704+
; RV64I-NEXT: not a2, a2
705+
; RV64I-NEXT: slli a3, a3, 1
706+
; RV64I-NEXT: sll a2, a3, a2
707+
; RV64I-NEXT: or a0, a0, a2
708+
; RV64I-NEXT: ret
709+
%zext = zext nneg i32 %b to i128
710+
%1 = ashr i128 %a, %zext
711+
ret i128 %1
712+
}
713+
714+
define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind {
715+
; RV32I-LABEL: shl128_shamt32:
716+
; RV32I: # %bb.0:
717+
; RV32I-NEXT: addi sp, sp, -32
718+
; RV32I-NEXT: lw a3, 0(a1)
719+
; RV32I-NEXT: lw a4, 4(a1)
720+
; RV32I-NEXT: lw a5, 8(a1)
721+
; RV32I-NEXT: lw a1, 12(a1)
722+
; RV32I-NEXT: sw zero, 0(sp)
723+
; RV32I-NEXT: sw zero, 4(sp)
724+
; RV32I-NEXT: sw zero, 8(sp)
725+
; RV32I-NEXT: sw zero, 12(sp)
726+
; RV32I-NEXT: srli a6, a2, 3
727+
; RV32I-NEXT: addi a7, sp, 16
728+
; RV32I-NEXT: andi t0, a2, 31
729+
; RV32I-NEXT: andi a6, a6, 12
730+
; RV32I-NEXT: sub a6, a7, a6
731+
; RV32I-NEXT: sw a3, 16(sp)
732+
; RV32I-NEXT: sw a4, 20(sp)
733+
; RV32I-NEXT: sw a5, 24(sp)
734+
; RV32I-NEXT: sw a1, 28(sp)
735+
; RV32I-NEXT: lw a1, 0(a6)
736+
; RV32I-NEXT: lw a3, 4(a6)
737+
; RV32I-NEXT: lw a4, 8(a6)
738+
; RV32I-NEXT: lw a5, 12(a6)
739+
; RV32I-NEXT: xori a6, t0, 31
740+
; RV32I-NEXT: sll a7, a3, a2
741+
; RV32I-NEXT: srli t0, a1, 1
742+
; RV32I-NEXT: sll a5, a5, a2
743+
; RV32I-NEXT: sll a1, a1, a2
744+
; RV32I-NEXT: sll a2, a4, a2
745+
; RV32I-NEXT: srli a3, a3, 1
746+
; RV32I-NEXT: srli a4, a4, 1
747+
; RV32I-NEXT: srl t0, t0, a6
748+
; RV32I-NEXT: srl a3, a3, a6
749+
; RV32I-NEXT: srl a4, a4, a6
750+
; RV32I-NEXT: or a6, a7, t0
751+
; RV32I-NEXT: or a2, a2, a3
752+
; RV32I-NEXT: or a4, a5, a4
753+
; RV32I-NEXT: sw a1, 0(a0)
754+
; RV32I-NEXT: sw a6, 4(a0)
755+
; RV32I-NEXT: sw a2, 8(a0)
756+
; RV32I-NEXT: sw a4, 12(a0)
757+
; RV32I-NEXT: addi sp, sp, 32
758+
; RV32I-NEXT: ret
759+
;
760+
; RV64I-LABEL: shl128_shamt32:
761+
; RV64I: # %bb.0:
762+
; RV64I-NEXT: addi a4, a2, -64
763+
; RV64I-NEXT: sll a3, a0, a2
764+
; RV64I-NEXT: bltz a4, .LBB16_2
765+
; RV64I-NEXT: # %bb.1:
766+
; RV64I-NEXT: mv a1, a3
767+
; RV64I-NEXT: j .LBB16_3
768+
; RV64I-NEXT: .LBB16_2:
769+
; RV64I-NEXT: sll a1, a1, a2
770+
; RV64I-NEXT: not a2, a2
771+
; RV64I-NEXT: srli a0, a0, 1
772+
; RV64I-NEXT: srl a0, a0, a2
773+
; RV64I-NEXT: or a1, a1, a0
774+
; RV64I-NEXT: .LBB16_3:
775+
; RV64I-NEXT: srai a0, a4, 63
776+
; RV64I-NEXT: and a0, a0, a3
777+
; RV64I-NEXT: ret
778+
%zext = zext nneg i32 %b to i128
779+
%1 = shl i128 %a, %zext
780+
ret i128 %1
781+
}

0 commit comments

Comments
 (0)