Skip to content

Commit 618d555

Browse files
committed
[LoopUnroll] add test for full unroll that is sensitive to cost-model; NFC
See discussion in D90554.
1 parent 2ed3a76 commit 618d555

File tree

1 file changed

+111
-0
lines changed

1 file changed

+111
-0
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-unroll -unroll-threshold=300 -S %s | FileCheck %s
3+
4+
; This test was full unrolled and simplified at -O3 with clang 11.
5+
; Changes to the cost model may cause that decision to differ.
6+
; We would not necessarily view the difference as a regression,
7+
; but we should be aware that cost model changes can affect an
8+
; example like this drastically.
9+
10+
target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
11+
target triple = "aarch64-w64-windows-gnu"
12+
13+
@tab_log2 = internal unnamed_addr constant [33 x i16] [i16 4, i16 1459, i16 2870, i16 4240, i16 5572, i16 6867, i16 8127, i16 9355, i16 10552, i16 11719, i16 12858, i16 13971, i16 15057, i16 16120, i16 17158, i16 18175, i16 19170, i16 20145, i16 21100, i16 22036, i16 22954, i16 23854, i16 24738, i16 25605, i16 26457, i16 27294, i16 28116, i16 28924, i16 29719, i16 30500, i16 31269, i16 32025, i16 -32767], align 2
14+
15+
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
16+
declare double @llvm.log2.f64(double)
17+
18+
define i32 @tripcount_11() {
19+
; CHECK-LABEL: @tripcount_11(
20+
; CHECK-NEXT: do.body6.preheader:
21+
; CHECK-NEXT: br label [[DO_BODY6:%.*]]
22+
; CHECK: for.cond:
23+
; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[DIV20:%.*]], 0
24+
; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_END:%.*]], label [[DO_BODY6]]
25+
; CHECK: do.body6:
26+
; CHECK-NEXT: [[I_021:%.*]] = phi i32 [ [[DIV20]], [[FOR_COND:%.*]] ], [ 1024, [[DO_BODY6_PREHEADER:%.*]] ]
27+
; CHECK-NEXT: [[OR_I:%.*]] = or i32 [[I_021]], 1
28+
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[OR_I]], i1 true)
29+
; CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[I_021]], [[T0]]
30+
; CHECK-NEXT: [[AND_I:%.*]] = lshr i32 [[SHL_I]], 26
31+
; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[AND_I]] to i8
32+
; CHECK-NEXT: [[CONV3_I:%.*]] = and i8 [[T1]], 31
33+
; CHECK-NEXT: [[AND4_I:%.*]] = lshr i32 [[SHL_I]], 11
34+
; CHECK-NEXT: [[CONV6_I:%.*]] = and i32 [[AND4_I]], 32767
35+
; CHECK-NEXT: [[IDXPROM_I:%.*]] = zext i8 [[CONV3_I]] to i64
36+
; CHECK-NEXT: [[ARRAYIDX_I7:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[IDXPROM_I]]
37+
; CHECK-NEXT: [[T2:%.*]] = load i16, i16* [[ARRAYIDX_I7]], align 2
38+
; CHECK-NEXT: [[CONV7_I:%.*]] = zext i16 [[T2]] to i32
39+
; CHECK-NEXT: [[NARROW_I:%.*]] = add nuw nsw i8 [[CONV3_I]], 1
40+
; CHECK-NEXT: [[T3:%.*]] = zext i8 [[NARROW_I]] to i64
41+
; CHECK-NEXT: [[ARRAYIDX11_I:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[T3]]
42+
; CHECK-NEXT: [[T4:%.*]] = load i16, i16* [[ARRAYIDX11_I]], align 2
43+
; CHECK-NEXT: [[CONV12_I:%.*]] = zext i16 [[T4]] to i32
44+
; CHECK-NEXT: [[SUB16_I:%.*]] = sub nsw i32 [[CONV12_I]], [[CONV7_I]]
45+
; CHECK-NEXT: [[MUL_I8:%.*]] = mul nsw i32 [[CONV6_I]], [[SUB16_I]]
46+
; CHECK-NEXT: [[SHR17_I:%.*]] = ashr i32 [[MUL_I8]], 15
47+
; CHECK-NEXT: [[CONV_I:%.*]] = shl nuw nsw i32 [[T0]], 15
48+
; CHECK-NEXT: [[SHL20_I:%.*]] = xor i32 [[CONV_I]], 1015808
49+
; CHECK-NEXT: [[ADD18_I:%.*]] = add nuw nsw i32 [[SHL20_I]], [[CONV7_I]]
50+
; CHECK-NEXT: [[ADD21_I:%.*]] = add nsw i32 [[ADD18_I]], [[SHR17_I]]
51+
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[I_021]] to double
52+
; CHECK-NEXT: [[T5:%.*]] = tail call double @llvm.log2.f64(double [[CONV]])
53+
; CHECK-NEXT: [[CONV8:%.*]] = fptosi double [[T5]] to i32
54+
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[CONV8]], 15
55+
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 4
56+
; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i32 [[ADD21_I]], [[ADD]]
57+
; CHECK-NEXT: [[DIV20]] = lshr i32 [[I_021]], 1
58+
; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND]], label [[IF_THEN11:%.*]]
59+
; CHECK: if.then11:
60+
; CHECK-NEXT: unreachable
61+
; CHECK: for.end:
62+
; CHECK-NEXT: ret i32 0
63+
;
64+
do.body6.preheader:
65+
br label %do.body6
66+
67+
for.cond:
68+
%cmp5.not = icmp eq i32 %div20, 0
69+
br i1 %cmp5.not, label %for.end, label %do.body6
70+
71+
do.body6:
72+
%i.021 = phi i32 [ %div20, %for.cond ], [ 1024, %do.body6.preheader ]
73+
%or.i = or i32 %i.021, 1
74+
%t0 = tail call i32 @llvm.ctlz.i32(i32 %or.i, i1 true)
75+
%shl.i = shl i32 %i.021, %t0
76+
%and.i = lshr i32 %shl.i, 26
77+
%t1 = trunc i32 %and.i to i8
78+
%conv3.i = and i8 %t1, 31
79+
%and4.i = lshr i32 %shl.i, 11
80+
%conv6.i = and i32 %and4.i, 32767
81+
%idxprom.i = zext i8 %conv3.i to i64
82+
%arrayidx.i7 = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 %idxprom.i
83+
%t2 = load i16, i16* %arrayidx.i7, align 2
84+
%conv7.i = zext i16 %t2 to i32
85+
%narrow.i = add nuw nsw i8 %conv3.i, 1
86+
%t3 = zext i8 %narrow.i to i64
87+
%arrayidx11.i = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 %t3
88+
%t4 = load i16, i16* %arrayidx11.i, align 2
89+
%conv12.i = zext i16 %t4 to i32
90+
%sub16.i = sub nsw i32 %conv12.i, %conv7.i
91+
%mul.i8 = mul nsw i32 %conv6.i, %sub16.i
92+
%shr17.i = ashr i32 %mul.i8, 15
93+
%conv.i = shl nuw nsw i32 %t0, 15
94+
%shl20.i = xor i32 %conv.i, 1015808
95+
%add18.i = add nuw nsw i32 %shl20.i, %conv7.i
96+
%add21.i = add nsw i32 %add18.i, %shr17.i
97+
%conv = sitofp i32 %i.021 to double
98+
%t5 = tail call double @llvm.log2.f64(double %conv)
99+
%conv8 = fptosi double %t5 to i32
100+
%mul = shl nsw i32 %conv8, 15
101+
%add = or i32 %mul, 4
102+
%cmp9 = icmp eq i32 %add21.i, %add
103+
%div20 = lshr i32 %i.021, 1
104+
br i1 %cmp9, label %for.cond, label %if.then11
105+
106+
if.then11:
107+
unreachable
108+
109+
for.end:
110+
ret i32 0
111+
}

0 commit comments

Comments
 (0)