@@ -26,8 +26,7 @@ define void @matrix_extract_insert_scalar(i32 %i, i32 %k, i32 %j, [225 x double]
26
26
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
27
27
; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, double* [[TMP9]], align 8
28
28
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
29
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
30
- ; CHECK-NEXT: store double [[SUB]], double* [[TMP10]], align 8
29
+ ; CHECK-NEXT: store double [[SUB]], double* [[TMP9]], align 8
31
30
; CHECK-NEXT: ret void
32
31
;
33
32
entry:
@@ -93,43 +92,99 @@ define void @matrix_extract_insert_loop(i32 %i, [225 x double]* nonnull align 8
93
92
; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64
94
93
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>*
95
94
; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0
96
- ; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
97
- ; CHECK: for.cond1.preheader.us.preheader:
98
- ; CHECK-NEXT: [[DOTPRE_PRE:%.*]] = load <225 x double>, <225 x double>* [[TMP1]], align 8
99
- ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
95
+ ; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]]
100
96
; CHECK: for.cond1.preheader.us:
101
- ; CHECK-NEXT: [[DOTPRE:%.*]] = phi <225 x double> [ [[MATINS_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ [[DOTPRE_PRE]], [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
102
- ; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ [[INC13_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
103
- ; CHECK-NEXT: [[CONV5_US:%.*]] = zext i32 [[J_014_US]] to i64
104
- ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[CONV5_US]], 15
105
- ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], [[CONV6]]
106
- ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 225
107
- ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]])
97
+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225
98
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
99
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]]
108
100
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
109
101
; CHECK: for.body4.us:
110
- ; CHECK-NEXT: [[TMP5:%.*]] = phi <225 x double> [ [[DOTPRE]], [[FOR_COND1_PREHEADER_US]] ], [ [[MATINS_US]], [[FOR_BODY4_US]] ]
111
102
; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ]
112
103
; CHECK-NEXT: [[CONV_US:%.*]] = zext i32 [[K_013_US]] to i64
113
- ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP2]], [[CONV_US]]
114
- ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 225
115
- ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP7]])
116
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP6]]
117
- ; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP8]], align 8
118
- ; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = extractelement <225 x double> [[TMP5]], i64 [[TMP3]]
104
+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[K_013_US]], 225
105
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]])
106
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]]
107
+ ; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, double* [[TMP5]], align 8
108
+ ; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP3]], align 8
119
109
; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]]
120
- ; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = extractelement <225 x double> [[TMP5]], i64 [[TMP6]]
110
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]]
111
+ ; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP6]], align 8
121
112
; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]]
122
- ; CHECK-NEXT: [[MATINS_US]] = insertelement <225 x double> [[TMP5]], double [[SUB_US]], i64 [[TMP6]]
123
- ; CHECK-NEXT: store <225 x double> [[MATINS_US]], <225 x double>* [[TMP1]], align 8
124
- ; CHECK-NEXT: [[INC_US]] = add nuw i32 [[K_013_US]], 1
113
+ ; CHECK-NEXT: store double [[SUB_US]], double* [[TMP6]], align 8
114
+ ; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[K_013_US]], 1
125
115
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i32 [[INC_US]], [[I]]
126
- ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
116
+ ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.* ]]
127
117
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
128
- ; CHECK-NEXT: [[INC13_US]] = add nuw nsw i32 [[J_014_US]], 1
129
- ; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i32 [[J_014_US]], 3
130
- ; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP]]
118
+ ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[CONV6]], 15
119
+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[I]], 210
120
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP8]])
121
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP7]]
122
+ ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]]
131
123
; CHECK: for.cond.cleanup:
132
124
; CHECK-NEXT: ret void
125
+ ; CHECK: for.body4.us.1:
126
+ ; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ]
127
+ ; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15
128
+ ; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[NARROW]] to i64
129
+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[K_013_US_1]], 210
130
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
131
+ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP10]]
132
+ ; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP12]], align 8
133
+ ; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP9]], align 8
134
+ ; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]]
135
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP10]]
136
+ ; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP13]], align 8
137
+ ; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]]
138
+ ; CHECK-NEXT: store double [[SUB_US_1]], double* [[TMP13]], align 8
139
+ ; CHECK-NEXT: [[INC_US_1]] = add nuw nsw i32 [[K_013_US_1]], 1
140
+ ; CHECK-NEXT: [[CMP2_US_1:%.*]] = icmp ult i32 [[INC_US_1]], [[I]]
141
+ ; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]]
142
+ ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1:
143
+ ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[CONV6]], 30
144
+ ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[I]], 195
145
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
146
+ ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP14]]
147
+ ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]]
148
+ ; CHECK: for.body4.us.2:
149
+ ; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ]
150
+ ; CHECK-NEXT: [[NARROW16:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30
151
+ ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW16]] to i64
152
+ ; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195
153
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]])
154
+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]]
155
+ ; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP19]], align 8
156
+ ; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP16]], align 8
157
+ ; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]]
158
+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP17]]
159
+ ; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP20]], align 8
160
+ ; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]]
161
+ ; CHECK-NEXT: store double [[SUB_US_2]], double* [[TMP20]], align 8
162
+ ; CHECK-NEXT: [[INC_US_2]] = add nuw nsw i32 [[K_013_US_2]], 1
163
+ ; CHECK-NEXT: [[CMP2_US_2:%.*]] = icmp ult i32 [[INC_US_2]], [[I]]
164
+ ; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]]
165
+ ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2:
166
+ ; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[CONV6]], 45
167
+ ; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i32 [[I]], 180
168
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
169
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP21]]
170
+ ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]]
171
+ ; CHECK: for.body4.us.3:
172
+ ; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ]
173
+ ; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45
174
+ ; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW17]] to i64
175
+ ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180
176
+ ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]])
177
+ ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]]
178
+ ; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP26]], align 8
179
+ ; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP23]], align 8
180
+ ; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]]
181
+ ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP24]]
182
+ ; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP27]], align 8
183
+ ; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]]
184
+ ; CHECK-NEXT: store double [[SUB_US_3]], double* [[TMP27]], align 8
185
+ ; CHECK-NEXT: [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1
186
+ ; CHECK-NEXT: [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]]
187
+ ; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]]
133
188
;
134
189
entry:
135
190
%i.addr = alloca i32 , align 4
0 commit comments