Skip to content

Commit 22ba351

Browse files
committed
[RISCV][SLP] Test for <3 x Ty> reductions which require reordering
These tests show a vectorizable reduction where the order of the reduction has been adjusted so that profitable vectorization requires a reordering of the computation. We currently have no reordering in SLP for non-power-of-two vectors, so this doesn't work. Note that due to reassociation performed in the standard pipeline, this is actually the canonical form for a reduction reaching SLP.
1 parent 6421dcc commit 22ba351

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,52 @@ define i32 @dot_product_i32(ptr %a, ptr %b) {
554554
ret i32 %add.1
555555
}
556556

557+
; Same as above, except the reduction order has been perturbed. This
558+
; is checking for our ability to reorder.
559+
define i32 @dot_product_i32_reorder(ptr %a, ptr %b) {
560+
; CHECK-LABEL: @dot_product_i32_reorder(
561+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
562+
; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
563+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
564+
; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
565+
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
566+
; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
567+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
568+
; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
569+
; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
570+
; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
571+
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
572+
; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
573+
; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
574+
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
575+
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
576+
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
577+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
578+
; CHECK-NEXT: ret i32 [[ADD_1]]
579+
;
580+
%gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
581+
%l.a.0 = load i32, ptr %gep.a.0, align 4
582+
%gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
583+
%l.a.1 = load i32, ptr %gep.a.1, align 4
584+
%gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
585+
%l.a.2 = load i32, ptr %gep.a.2, align 4
586+
587+
%gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
588+
%l.b.0 = load i32, ptr %gep.b.0, align 4
589+
%gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
590+
%l.b.1 = load i32, ptr %gep.b.1, align 4
591+
%gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
592+
%l.b.2 = load i32, ptr %gep.b.2, align 4
593+
594+
%mul.0 = mul nsw i32 %l.a.0, %l.b.0
595+
%mul.1 = mul nsw i32 %l.a.1, %l.b.1
596+
%mul.2 = mul nsw i32 %l.a.2, %l.b.2
597+
598+
%add.0 = add i32 %mul.1, %mul.0
599+
%add.1 = add i32 %add.0, %mul.2
600+
ret i32 %add.1
601+
}
602+
557603
define float @dot_product_fp32(ptr %a, ptr %b) {
558604
; NON-POW2-LABEL: @dot_product_fp32(
559605
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
@@ -604,6 +650,50 @@ define float @dot_product_fp32(ptr %a, ptr %b) {
604650
ret float %add.1
605651
}
606652

653+
; Same as above, except the reduction order has been perturbed. This
654+
; is checking for our ability to reorder.
655+
define float @dot_product_fp32_reorder(ptr %a, ptr %b) {
656+
; CHECK-LABEL: @dot_product_fp32_reorder(
657+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
658+
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
659+
; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
660+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
661+
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
662+
; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
663+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
664+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
665+
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
666+
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
667+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
668+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
669+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]]
670+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
671+
; CHECK-NEXT: ret float [[ADD_1]]
672+
;
673+
%gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
674+
%l.a.0 = load float, ptr %gep.a.0, align 4
675+
%gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
676+
%l.a.1 = load float, ptr %gep.a.1, align 4
677+
%gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
678+
%l.a.2 = load float, ptr %gep.a.2, align 4
679+
680+
%gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
681+
%l.b.0 = load float, ptr %gep.b.0, align 4
682+
%gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
683+
%l.b.1 = load float, ptr %gep.b.1, align 4
684+
%gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
685+
%l.b.2 = load float, ptr %gep.b.2, align 4
686+
687+
%mul.0 = fmul fast float %l.a.0, %l.b.0
688+
%mul.1 = fmul fast float %l.a.1, %l.b.1
689+
%mul.2 = fmul fast float %l.a.2, %l.b.2
690+
691+
%add.0 = fadd fast float %mul.1, %mul.0
692+
%add.1 = fadd fast float %add.0, %mul.2
693+
ret float %add.1
694+
}
695+
696+
607697
define double @dot_product_fp64(ptr %a, ptr %b) {
608698
; NON-POW2-LABEL: @dot_product_fp64(
609699
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0

0 commit comments

Comments
 (0)