Skip to content

Commit d38f624

Browse files
committed
[AArch64][NFC] Add test as a representative of scalarizing a vector integer division
The last resort to vectorize a bundle of integer divisions is considered scalarizing it. Currently, the cost estimates for scalarizing a vector division can be considerably overestimated as is the scenario with this motivating test case i.e. vector cost should not deviate much from the scalar cost. Future patch will try to improve the scalarization cost.
1 parent b94762d commit d38f624

File tree

1 file changed

+54
-0
lines changed
  • llvm/test/Transforms/SLPVectorizer/AArch64

1 file changed

+54
-0
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/div.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,3 +551,57 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
551551
%r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
552552
ret <4 x i32> %r3
553553
}
554+
555+
; computes (a/const + x - y) * z
556+
define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
557+
; NO-SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
558+
; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
559+
; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
560+
; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
561+
; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 2
562+
; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 4
563+
; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
564+
; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
565+
; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
566+
; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
567+
; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
568+
; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
569+
; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
570+
; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
571+
; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
572+
; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
573+
; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
574+
; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
575+
; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
576+
; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
577+
; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
578+
;
579+
; SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
580+
; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
581+
; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
582+
; SVE-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
583+
; SVE-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
584+
; SVE-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
585+
; SVE-NEXT: ret <2 x i32> [[TMP4]]
586+
;
587+
{
588+
%a0 = extractelement <2 x i32> %a, i64 0
589+
%a1 = extractelement <2 x i32> %a, i64 1
590+
%1 = sdiv i32 %a0, 2
591+
%2 = sdiv i32 %a1, 4
592+
%x0 = extractelement <2 x i32> %x, i64 0
593+
%x1 = extractelement <2 x i32> %x, i64 1
594+
%3 = add i32 %1, %x0
595+
%4 = add i32 %2, %x1
596+
%y0 = extractelement <2 x i32> %y, i64 0
597+
%y1 = extractelement <2 x i32> %y, i64 1
598+
%5 = sub i32 %3, %y0
599+
%6 = sub i32 %4, %y1
600+
%z0 = extractelement <2 x i32> %z, i64 0
601+
%z1 = extractelement <2 x i32> %z, i64 1
602+
%7 = mul i32 %5, %z0
603+
%8 = mul i32 %6, %z1
604+
%res0 = insertelement <2 x i32> poison, i32 %7, i32 0
605+
%res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
606+
ret <2 x i32> %res1
607+
}

0 commit comments

Comments
 (0)