Skip to content

Commit 7e85cb8

Browse files
authored
[AArch64][NFC] Add test as a representative of scalarizing a vector i… (#114107)
…nteger division The last resort to vectorize a bundle of integer divisions is considered scalarizing it. Currently, the cost estimates for scalarizing a vector division can be considerably overestimated as is the scenario with this motivating test case i.e. vector cost should not deviate much from the scalar cost. Future patch will try to improve the scalarization cost.
1 parent 1e897ed commit 7e85cb8

File tree

1 file changed

+54
-0
lines changed
  • llvm/test/Transforms/SLPVectorizer/AArch64

1 file changed

+54
-0
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/div.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,3 +551,57 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
551551
%r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
552552
ret <4 x i32> %r3
553553
}
554+
555+
; computes (a/const + x - y) * z
556+
define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
557+
; NO-SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
558+
; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
559+
; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
560+
; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
561+
; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 2
562+
; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 4
563+
; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
564+
; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
565+
; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
566+
; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
567+
; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
568+
; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
569+
; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
570+
; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
571+
; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
572+
; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
573+
; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
574+
; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
575+
; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
576+
; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
577+
; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
578+
;
579+
; SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
580+
; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
581+
; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
582+
; SVE-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
583+
; SVE-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
584+
; SVE-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
585+
; SVE-NEXT: ret <2 x i32> [[TMP4]]
586+
;
587+
{
588+
%a0 = extractelement <2 x i32> %a, i64 0
589+
%a1 = extractelement <2 x i32> %a, i64 1
590+
%1 = sdiv i32 %a0, 2
591+
%2 = sdiv i32 %a1, 4
592+
%x0 = extractelement <2 x i32> %x, i64 0
593+
%x1 = extractelement <2 x i32> %x, i64 1
594+
%3 = add i32 %1, %x0
595+
%4 = add i32 %2, %x1
596+
%y0 = extractelement <2 x i32> %y, i64 0
597+
%y1 = extractelement <2 x i32> %y, i64 1
598+
%5 = sub i32 %3, %y0
599+
%6 = sub i32 %4, %y1
600+
%z0 = extractelement <2 x i32> %z, i64 0
601+
%z1 = extractelement <2 x i32> %z, i64 1
602+
%7 = mul i32 %5, %z0
603+
%8 = mul i32 %6, %z1
604+
%res0 = insertelement <2 x i32> poison, i32 %7, i32 0
605+
%res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
606+
ret <2 x i32> %res1
607+
}

0 commit comments

Comments
 (0)