@@ -7,105 +7,140 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
7
7
; CHECK-A55-LABEL: @runtime_unroll_generic(
8
8
; CHECK-A55-NEXT: entry:
9
9
; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
10
- ; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]]
11
- ; CHECK-A55: for.body6.lr.ph:
12
- ; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 undef
13
- ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 undef
14
- ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 undef
10
+ ; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]]
11
+ ; CHECK-A55: for.body6.preheader:
15
12
; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3
16
13
; CHECK-A55-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_0]], 4
17
- ; CHECK-A55-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW :%.*]]
18
- ; CHECK-A55: for.body6.lr.ph .new:
14
+ ; CHECK-A55-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW :%.*]]
15
+ ; CHECK-A55: for.body6.preheader .new:
19
16
; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4
20
17
; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]]
21
18
; CHECK-A55: for.body6:
22
- ; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
19
+ ; CHECK-A55-NEXT: [[K_03:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY6]] ]
20
+ ; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
21
+ ; CHECK-A55-NEXT: [[IDX_EXT:%.*]] = zext i32 [[K_03]] to i64
22
+ ; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 [[IDX_EXT]]
23
23
; CHECK-A55-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
24
24
; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
25
+ ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 [[IDX_EXT]]
25
26
; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
26
27
; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP2]] to i32
27
28
; CHECK-A55-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]]
29
+ ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 [[IDX_EXT]]
28
30
; CHECK-A55-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
29
31
; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP3]]
30
32
; CHECK-A55-NEXT: store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4
31
- ; CHECK-A55-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
33
+ ; CHECK-A55-NEXT: [[INC:%.*]] = or i32 [[K_03]], 1
34
+ ; CHECK-A55-NEXT: [[IDX_EXT_1:%.*]] = zext i32 [[INC]] to i64
35
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_1]]
36
+ ; CHECK-A55-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX10_1]], align 2
32
37
; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32
33
- ; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
38
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_1]]
39
+ ; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX14_1]], align 2
34
40
; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP5]] to i32
35
41
; CHECK-A55-NEXT: [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]]
36
- ; CHECK-A55-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[ADD21]]
37
- ; CHECK-A55-NEXT: store i32 [[ADD21_1]], ptr [[ARRAYIDX20]], align 4
38
- ; CHECK-A55-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
39
- ; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP6]] to i32
40
- ; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
41
- ; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP7]] to i32
42
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_1:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_1]]
43
+ ; CHECK-A55-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX20_1]], align 4
44
+ ; CHECK-A55-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[TMP6]]
45
+ ; CHECK-A55-NEXT: store i32 [[ADD21_1]], ptr [[ARRAYIDX20_1]], align 4
46
+ ; CHECK-A55-NEXT: [[INC_1:%.*]] = or i32 [[K_03]], 2
47
+ ; CHECK-A55-NEXT: [[IDX_EXT_2:%.*]] = zext i32 [[INC_1]] to i64
48
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_2]]
49
+ ; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX10_2]], align 2
50
+ ; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32
51
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_2]]
52
+ ; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX14_2]], align 2
53
+ ; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32
42
54
; CHECK-A55-NEXT: [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]]
43
- ; CHECK-A55-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[ADD21_1]]
44
- ; CHECK-A55-NEXT: store i32 [[ADD21_2]], ptr [[ARRAYIDX20]], align 4
45
- ; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
46
- ; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP8]] to i32
47
- ; CHECK-A55-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
48
- ; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP9]] to i32
55
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_2]]
56
+ ; CHECK-A55-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX20_2]], align 4
57
+ ; CHECK-A55-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[TMP9]]
58
+ ; CHECK-A55-NEXT: store i32 [[ADD21_2]], ptr [[ARRAYIDX20_2]], align 4
59
+ ; CHECK-A55-NEXT: [[INC_2:%.*]] = or i32 [[K_03]], 3
60
+ ; CHECK-A55-NEXT: [[IDX_EXT_3:%.*]] = zext i32 [[INC_2]] to i64
61
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_3]]
62
+ ; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10_3]], align 2
63
+ ; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP10]] to i32
64
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_3:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_3]]
65
+ ; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX14_3]], align 2
66
+ ; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP11]] to i32
49
67
; CHECK-A55-NEXT: [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]]
50
- ; CHECK-A55-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[ADD21_2]]
51
- ; CHECK-A55-NEXT: store i32 [[ADD21_3]], ptr [[ARRAYIDX20]], align 4
68
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_3]]
69
+ ; CHECK-A55-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX20_3]], align 4
70
+ ; CHECK-A55-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[TMP12]]
71
+ ; CHECK-A55-NEXT: store i32 [[ADD21_3]], ptr [[ARRAYIDX20_3]], align 4
72
+ ; CHECK-A55-NEXT: [[INC_3]] = add nuw i32 [[K_03]], 4
52
73
; CHECK-A55-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4
53
74
; CHECK-A55-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
54
75
; CHECK-A55-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]]
55
76
; CHECK-A55: for.end.loopexit.unr-lcssa:
77
+ ; CHECK-A55-NEXT: [[K_03_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INC_3]], [[FOR_BODY6]] ]
56
78
; CHECK-A55-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
57
79
; CHECK-A55-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]]
58
80
; CHECK-A55: for.body6.epil:
59
- ; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
60
- ; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP10]] to i32
61
- ; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
62
- ; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP11]] to i32
81
+ ; CHECK-A55-NEXT: [[IDX_EXT_EPIL:%.*]] = zext i32 [[K_03_UNR]] to i64
82
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_EPIL]]
83
+ ; CHECK-A55-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL]], align 2
84
+ ; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP13]] to i32
85
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_EPIL]]
86
+ ; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL]], align 2
87
+ ; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP14]] to i32
63
88
; CHECK-A55-NEXT: [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]]
64
- ; CHECK-A55-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
65
- ; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP12]]
66
- ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL]], ptr [[ARRAYIDX20]], align 4
89
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_EPIL]]
90
+ ; CHECK-A55-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL]], align 4
91
+ ; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP15]]
92
+ ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL]], ptr [[ARRAYIDX20_EPIL]], align 4
67
93
; CHECK-A55-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 1
68
94
; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]]
69
95
; CHECK-A55: for.body6.epil.1:
70
- ; CHECK-A55-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
71
- ; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP13]] to i32
72
- ; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
73
- ; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32
96
+ ; CHECK-A55-NEXT: [[INC_EPIL:%.*]] = add nuw i32 [[K_03_UNR]], 1
97
+ ; CHECK-A55-NEXT: [[IDX_EXT_EPIL_1:%.*]] = zext i32 [[INC_EPIL]] to i64
98
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_EPIL_1]]
99
+ ; CHECK-A55-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_1]], align 2
100
+ ; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP16]] to i32
101
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_EPIL_1]]
102
+ ; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_1]], align 2
103
+ ; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP17]] to i32
74
104
; CHECK-A55-NEXT: [[MUL16_EPIL_1:%.*]] = mul nsw i32 [[CONV15_EPIL_1]], [[CONV_EPIL_1]]
75
- ; CHECK-A55-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
76
- ; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP15]]
77
- ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_1]], ptr [[ARRAYIDX20]], align 4
105
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_EPIL_1]]
106
+ ; CHECK-A55-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_1]], align 4
107
+ ; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP18]]
108
+ ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_1]], ptr [[ARRAYIDX20_EPIL_1]], align 4
78
109
; CHECK-A55-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 2
79
110
; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_2:%.*]]
80
111
; CHECK-A55: for.body6.epil.2:
81
- ; CHECK-A55-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
82
- ; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP16]] to i32
83
- ; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
84
- ; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP17]] to i32
112
+ ; CHECK-A55-NEXT: [[INC_EPIL_1:%.*]] = add nuw i32 [[K_03_UNR]], 2
113
+ ; CHECK-A55-NEXT: [[IDX_EXT_EPIL_2:%.*]] = zext i32 [[INC_EPIL_1]] to i64
114
+ ; CHECK-A55-NEXT: [[ARRAYIDX10_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[IDX_EXT_EPIL_2]]
115
+ ; CHECK-A55-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_2]], align 2
116
+ ; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP19]] to i32
117
+ ; CHECK-A55-NEXT: [[ARRAYIDX14_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[IDX_EXT_EPIL_2]]
118
+ ; CHECK-A55-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_2]], align 2
119
+ ; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP20]] to i32
85
120
; CHECK-A55-NEXT: [[MUL16_EPIL_2:%.*]] = mul nsw i32 [[CONV15_EPIL_2]], [[CONV_EPIL_2]]
86
- ; CHECK-A55-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
87
- ; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP18]]
88
- ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_2]], ptr [[ARRAYIDX20]], align 4
121
+ ; CHECK-A55-NEXT: [[ARRAYIDX20_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[IDX_EXT_EPIL_2]]
122
+ ; CHECK-A55-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_2]], align 4
123
+ ; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP21]]
124
+ ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_2]], ptr [[ARRAYIDX20_EPIL_2]], align 4
89
125
; CHECK-A55-NEXT: br label [[FOR_END]]
90
126
; CHECK-A55: for.end:
91
127
; CHECK-A55-NEXT: ret void
92
128
;
93
129
; CHECK-GENERIC-LABEL: @runtime_unroll_generic(
94
130
; CHECK-GENERIC-NEXT: entry:
95
131
; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
96
- ; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]]
97
- ; CHECK-GENERIC: for.body6.lr.ph:
98
- ; CHECK-GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 undef
99
- ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 undef
100
- ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 undef
101
- ; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]]
132
+ ; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]]
102
133
; CHECK-GENERIC: for.body6:
103
- ; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY6]] ]
134
+ ; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ]
135
+ ; CHECK-GENERIC-NEXT: [[IDX_EXT:%.*]] = zext i32 [[K_03]] to i64
136
+ ; CHECK-GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 [[IDX_EXT]]
104
137
; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
105
138
; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
139
+ ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 [[IDX_EXT]]
106
140
; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
107
141
; CHECK-GENERIC-NEXT: [[CONV15:%.*]] = sext i16 [[TMP1]] to i32
108
142
; CHECK-GENERIC-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]]
143
+ ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 [[IDX_EXT]]
109
144
; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
110
145
; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]]
111
146
; CHECK-GENERIC-NEXT: store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4
@@ -148,16 +183,17 @@ for.cond4: ; preds = %for.inc, %for.body3
148
183
149
184
for.body6: ; preds = %for.cond4
150
185
%2 = load ptr , ptr %arg_2.addr , align 8
151
- %arrayidx10 = getelementptr inbounds i16 , ptr %2 , i64 undef
186
+ %idx.ext = zext i32 %0 to i64
187
+ %arrayidx10 = getelementptr inbounds i16 , ptr %2 , i64 %idx.ext
152
188
%3 = load i16 , ptr %arrayidx10 , align 2
153
189
%conv = sext i16 %3 to i32
154
190
%4 = load ptr , ptr %arg_3.addr , align 8
155
- %arrayidx14 = getelementptr inbounds i16 , ptr %4 , i64 undef
191
+ %arrayidx14 = getelementptr inbounds i16 , ptr %4 , i64 %idx.ext
156
192
%5 = load i16 , ptr %arrayidx14 , align 2
157
193
%conv15 = sext i16 %5 to i32
158
194
%mul16 = mul nsw i32 %conv , %conv15
159
195
%6 = load ptr , ptr %arg_1.addr , align 8
160
- %arrayidx20 = getelementptr inbounds i32 , ptr %6 , i64 undef
196
+ %arrayidx20 = getelementptr inbounds i32 , ptr %6 , i64 %idx.ext
161
197
%7 = load i32 , ptr %arrayidx20 , align 4
162
198
%add21 = add nsw i32 %7 , %mul16
163
199
store i32 %add21 , ptr %arrayidx20 , align 4
0 commit comments