Skip to content

Commit 660b740

Browse files
committed
[DAG] Support store merging of vector constant stores
Ran across this when making a change to RISCV memset lowering. Seems very odd that manually merging a store into a vector prevents it from being further merged. Differential Revision: https://reviews.llvm.org/D156349
1 parent 2ad297d commit 660b740

File tree

4 files changed

+108
-174
lines changed

4 files changed

+108
-174
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,11 @@ namespace {
698698
case ISD::Constant:
699699
case ISD::ConstantFP:
700700
return StoreSource::Constant;
701+
case ISD::BUILD_VECTOR:
702+
if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
703+
ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
704+
return StoreSource::Constant;
705+
return StoreSource::Unknown;
701706
case ISD::EXTRACT_VECTOR_ELT:
702707
case ISD::EXTRACT_SUBVECTOR:
703708
return StoreSource::Extract;
@@ -19471,6 +19476,10 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
1947119476
// If fp truncation is necessary give up for now.
1947219477
if (MemVT.getSizeInBits() != ElementSizeBits)
1947319478
return false;
19479+
} else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
19480+
ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
19481+
// Not yet handled
19482+
return false;
1947419483
} else {
1947519484
llvm_unreachable("Invalid constant element type");
1947619485
}
@@ -19601,7 +19610,7 @@ void DAGCombiner::getStoreMergeCandidates(
1960119610
case StoreSource::Constant:
1960219611
if (NoTypeMatch)
1960319612
return false;
19604-
if (!isIntOrFPConstant(OtherBC))
19613+
if (getStoreSource(OtherBC) != StoreSource::Constant)
1960519614
return false;
1960619615
break;
1960719616
case StoreSource::Extract:
@@ -19823,6 +19832,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
1982319832
IsElementZero = C->isZero();
1982419833
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
1982519834
IsElementZero = C->getConstantFPValue()->isNullValue();
19835+
else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
19836+
IsElementZero = true;
1982619837
if (IsElementZero) {
1982719838
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
1982819839
FirstZeroAfterNonZero = i;

llvm/test/CodeGen/RISCV/rvv/memset-inline.ll

Lines changed: 20 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -544,53 +544,31 @@ define void @bzero_32(ptr %a) nounwind {
544544
define void @bzero_64(ptr %a) nounwind {
545545
; RV32-LABEL: bzero_64:
546546
; RV32: # %bb.0:
547-
; RV32-NEXT: addi a1, a0, 48
548-
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
547+
; RV32-NEXT: li a1, 64
548+
; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
549549
; RV32-NEXT: vmv.v.i v8, 0
550-
; RV32-NEXT: vse8.v v8, (a1)
551-
; RV32-NEXT: addi a1, a0, 32
552-
; RV32-NEXT: vse8.v v8, (a1)
553-
; RV32-NEXT: addi a1, a0, 16
554-
; RV32-NEXT: vse8.v v8, (a1)
555550
; RV32-NEXT: vse8.v v8, (a0)
556551
; RV32-NEXT: ret
557552
;
558553
; RV64-LABEL: bzero_64:
559554
; RV64: # %bb.0:
560-
; RV64-NEXT: addi a1, a0, 48
561-
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
555+
; RV64-NEXT: li a1, 64
556+
; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
562557
; RV64-NEXT: vmv.v.i v8, 0
563-
; RV64-NEXT: vse8.v v8, (a1)
564-
; RV64-NEXT: addi a1, a0, 32
565-
; RV64-NEXT: vse8.v v8, (a1)
566-
; RV64-NEXT: addi a1, a0, 16
567-
; RV64-NEXT: vse8.v v8, (a1)
568558
; RV64-NEXT: vse8.v v8, (a0)
569559
; RV64-NEXT: ret
570560
;
571561
; RV32-FAST-LABEL: bzero_64:
572562
; RV32-FAST: # %bb.0:
573-
; RV32-FAST-NEXT: addi a1, a0, 48
574-
; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
563+
; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
575564
; RV32-FAST-NEXT: vmv.v.i v8, 0
576-
; RV32-FAST-NEXT: vse64.v v8, (a1)
577-
; RV32-FAST-NEXT: addi a1, a0, 32
578-
; RV32-FAST-NEXT: vse64.v v8, (a1)
579-
; RV32-FAST-NEXT: addi a1, a0, 16
580-
; RV32-FAST-NEXT: vse64.v v8, (a1)
581565
; RV32-FAST-NEXT: vse64.v v8, (a0)
582566
; RV32-FAST-NEXT: ret
583567
;
584568
; RV64-FAST-LABEL: bzero_64:
585569
; RV64-FAST: # %bb.0:
586-
; RV64-FAST-NEXT: addi a1, a0, 48
587-
; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
570+
; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
588571
; RV64-FAST-NEXT: vmv.v.i v8, 0
589-
; RV64-FAST-NEXT: vse64.v v8, (a1)
590-
; RV64-FAST-NEXT: addi a1, a0, 32
591-
; RV64-FAST-NEXT: vse64.v v8, (a1)
592-
; RV64-FAST-NEXT: addi a1, a0, 16
593-
; RV64-FAST-NEXT: vse64.v v8, (a1)
594572
; RV64-FAST-NEXT: vse64.v v8, (a0)
595573
; RV64-FAST-NEXT: ret
596574
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
@@ -686,27 +664,15 @@ define void @aligned_bzero_32(ptr %a) nounwind {
686664
define void @aligned_bzero_64(ptr %a) nounwind {
687665
; RV32-BOTH-LABEL: aligned_bzero_64:
688666
; RV32-BOTH: # %bb.0:
689-
; RV32-BOTH-NEXT: addi a1, a0, 48
690-
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
667+
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
691668
; RV32-BOTH-NEXT: vmv.v.i v8, 0
692-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
693-
; RV32-BOTH-NEXT: addi a1, a0, 32
694-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
695-
; RV32-BOTH-NEXT: addi a1, a0, 16
696-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
697669
; RV32-BOTH-NEXT: vse64.v v8, (a0)
698670
; RV32-BOTH-NEXT: ret
699671
;
700672
; RV64-BOTH-LABEL: aligned_bzero_64:
701673
; RV64-BOTH: # %bb.0:
702-
; RV64-BOTH-NEXT: addi a1, a0, 48
703-
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
674+
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
704675
; RV64-BOTH-NEXT: vmv.v.i v8, 0
705-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
706-
; RV64-BOTH-NEXT: addi a1, a0, 32
707-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
708-
; RV64-BOTH-NEXT: addi a1, a0, 16
709-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
710676
; RV64-BOTH-NEXT: vse64.v v8, (a0)
711677
; RV64-BOTH-NEXT: ret
712678
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
@@ -717,28 +683,16 @@ define void @aligned_bzero_66(ptr %a) nounwind {
717683
; RV32-BOTH-LABEL: aligned_bzero_66:
718684
; RV32-BOTH: # %bb.0:
719685
; RV32-BOTH-NEXT: sh zero, 64(a0)
720-
; RV32-BOTH-NEXT: addi a1, a0, 48
721-
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
686+
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
722687
; RV32-BOTH-NEXT: vmv.v.i v8, 0
723-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
724-
; RV32-BOTH-NEXT: addi a1, a0, 32
725-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
726-
; RV32-BOTH-NEXT: addi a1, a0, 16
727-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
728688
; RV32-BOTH-NEXT: vse64.v v8, (a0)
729689
; RV32-BOTH-NEXT: ret
730690
;
731691
; RV64-BOTH-LABEL: aligned_bzero_66:
732692
; RV64-BOTH: # %bb.0:
733693
; RV64-BOTH-NEXT: sh zero, 64(a0)
734-
; RV64-BOTH-NEXT: addi a1, a0, 48
735-
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
694+
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
736695
; RV64-BOTH-NEXT: vmv.v.i v8, 0
737-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
738-
; RV64-BOTH-NEXT: addi a1, a0, 32
739-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
740-
; RV64-BOTH-NEXT: addi a1, a0, 16
741-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
742696
; RV64-BOTH-NEXT: vse64.v v8, (a0)
743697
; RV64-BOTH-NEXT: ret
744698
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0)
@@ -754,12 +708,8 @@ define void @aligned_bzero_96(ptr %a) nounwind {
754708
; RV32-BOTH-NEXT: vse64.v v8, (a1)
755709
; RV32-BOTH-NEXT: addi a1, a0, 64
756710
; RV32-BOTH-NEXT: vse64.v v8, (a1)
757-
; RV32-BOTH-NEXT: addi a1, a0, 48
758-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
759-
; RV32-BOTH-NEXT: addi a1, a0, 32
760-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
761-
; RV32-BOTH-NEXT: addi a1, a0, 16
762-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
711+
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
712+
; RV32-BOTH-NEXT: vmv.v.i v8, 0
763713
; RV32-BOTH-NEXT: vse64.v v8, (a0)
764714
; RV32-BOTH-NEXT: ret
765715
;
@@ -771,12 +721,8 @@ define void @aligned_bzero_96(ptr %a) nounwind {
771721
; RV64-BOTH-NEXT: vse64.v v8, (a1)
772722
; RV64-BOTH-NEXT: addi a1, a0, 64
773723
; RV64-BOTH-NEXT: vse64.v v8, (a1)
774-
; RV64-BOTH-NEXT: addi a1, a0, 48
775-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
776-
; RV64-BOTH-NEXT: addi a1, a0, 32
777-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
778-
; RV64-BOTH-NEXT: addi a1, a0, 16
779-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
724+
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
725+
; RV64-BOTH-NEXT: vmv.v.i v8, 0
780726
; RV64-BOTH-NEXT: vse64.v v8, (a0)
781727
; RV64-BOTH-NEXT: ret
782728
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0)
@@ -786,43 +732,15 @@ define void @aligned_bzero_96(ptr %a) nounwind {
786732
define void @aligned_bzero_128(ptr %a) nounwind {
787733
; RV32-BOTH-LABEL: aligned_bzero_128:
788734
; RV32-BOTH: # %bb.0:
789-
; RV32-BOTH-NEXT: addi a1, a0, 112
790-
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
735+
; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
791736
; RV32-BOTH-NEXT: vmv.v.i v8, 0
792-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
793-
; RV32-BOTH-NEXT: addi a1, a0, 96
794-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
795-
; RV32-BOTH-NEXT: addi a1, a0, 80
796-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
797-
; RV32-BOTH-NEXT: addi a1, a0, 64
798-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
799-
; RV32-BOTH-NEXT: addi a1, a0, 48
800-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
801-
; RV32-BOTH-NEXT: addi a1, a0, 32
802-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
803-
; RV32-BOTH-NEXT: addi a1, a0, 16
804-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
805737
; RV32-BOTH-NEXT: vse64.v v8, (a0)
806738
; RV32-BOTH-NEXT: ret
807739
;
808740
; RV64-BOTH-LABEL: aligned_bzero_128:
809741
; RV64-BOTH: # %bb.0:
810-
; RV64-BOTH-NEXT: addi a1, a0, 112
811-
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
742+
; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
812743
; RV64-BOTH-NEXT: vmv.v.i v8, 0
813-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
814-
; RV64-BOTH-NEXT: addi a1, a0, 96
815-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
816-
; RV64-BOTH-NEXT: addi a1, a0, 80
817-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
818-
; RV64-BOTH-NEXT: addi a1, a0, 64
819-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
820-
; RV64-BOTH-NEXT: addi a1, a0, 48
821-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
822-
; RV64-BOTH-NEXT: addi a1, a0, 32
823-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
824-
; RV64-BOTH-NEXT: addi a1, a0, 16
825-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
826744
; RV64-BOTH-NEXT: vse64.v v8, (a0)
827745
; RV64-BOTH-NEXT: ret
828746
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 128, i1 0)
@@ -832,74 +750,18 @@ define void @aligned_bzero_128(ptr %a) nounwind {
832750
define void @aligned_bzero_256(ptr %a) nounwind {
833751
; RV32-BOTH-LABEL: aligned_bzero_256:
834752
; RV32-BOTH: # %bb.0:
835-
; RV32-BOTH-NEXT: addi a1, a0, 240
836-
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
837-
; RV32-BOTH-NEXT: vmv.v.i v8, 0
838-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
839-
; RV32-BOTH-NEXT: addi a1, a0, 224
840-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
841-
; RV32-BOTH-NEXT: addi a1, a0, 208
842-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
843-
; RV32-BOTH-NEXT: addi a1, a0, 192
844-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
845-
; RV32-BOTH-NEXT: addi a1, a0, 176
846-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
847-
; RV32-BOTH-NEXT: addi a1, a0, 160
848-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
849-
; RV32-BOTH-NEXT: addi a1, a0, 144
850-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
851753
; RV32-BOTH-NEXT: addi a1, a0, 128
852-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
853-
; RV32-BOTH-NEXT: addi a1, a0, 112
854-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
855-
; RV32-BOTH-NEXT: addi a1, a0, 96
856-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
857-
; RV32-BOTH-NEXT: addi a1, a0, 80
858-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
859-
; RV32-BOTH-NEXT: addi a1, a0, 64
860-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
861-
; RV32-BOTH-NEXT: addi a1, a0, 48
862-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
863-
; RV32-BOTH-NEXT: addi a1, a0, 32
864-
; RV32-BOTH-NEXT: vse64.v v8, (a1)
865-
; RV32-BOTH-NEXT: addi a1, a0, 16
754+
; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
755+
; RV32-BOTH-NEXT: vmv.v.i v8, 0
866756
; RV32-BOTH-NEXT: vse64.v v8, (a1)
867757
; RV32-BOTH-NEXT: vse64.v v8, (a0)
868758
; RV32-BOTH-NEXT: ret
869759
;
870760
; RV64-BOTH-LABEL: aligned_bzero_256:
871761
; RV64-BOTH: # %bb.0:
872-
; RV64-BOTH-NEXT: addi a1, a0, 240
873-
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
874-
; RV64-BOTH-NEXT: vmv.v.i v8, 0
875-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
876-
; RV64-BOTH-NEXT: addi a1, a0, 224
877-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
878-
; RV64-BOTH-NEXT: addi a1, a0, 208
879-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
880-
; RV64-BOTH-NEXT: addi a1, a0, 192
881-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
882-
; RV64-BOTH-NEXT: addi a1, a0, 176
883-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
884-
; RV64-BOTH-NEXT: addi a1, a0, 160
885-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
886-
; RV64-BOTH-NEXT: addi a1, a0, 144
887-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
888762
; RV64-BOTH-NEXT: addi a1, a0, 128
889-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
890-
; RV64-BOTH-NEXT: addi a1, a0, 112
891-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
892-
; RV64-BOTH-NEXT: addi a1, a0, 96
893-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
894-
; RV64-BOTH-NEXT: addi a1, a0, 80
895-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
896-
; RV64-BOTH-NEXT: addi a1, a0, 64
897-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
898-
; RV64-BOTH-NEXT: addi a1, a0, 48
899-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
900-
; RV64-BOTH-NEXT: addi a1, a0, 32
901-
; RV64-BOTH-NEXT: vse64.v v8, (a1)
902-
; RV64-BOTH-NEXT: addi a1, a0, 16
763+
; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
764+
; RV64-BOTH-NEXT: vmv.v.i v8, 0
903765
; RV64-BOTH-NEXT: vse64.v v8, (a1)
904766
; RV64-BOTH-NEXT: vse64.v v8, (a0)
905767
; RV64-BOTH-NEXT: ret

0 commit comments

Comments
 (0)