@@ -229,35 +229,34 @@ entry:
229
229
define void @lookahead_external_uses (double * %A , double *%B , double *%C , double *%D , double *%S , double *%Ext1 , double *%Ext2 ) {
230
230
; CHECK-LABEL: @lookahead_external_uses(
231
231
; CHECK-NEXT: entry:
232
- ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
233
232
; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
234
233
; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
235
234
; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
236
- ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
235
+ ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A:%.* ]], i64 1
237
236
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
238
- ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
237
+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double*> undef, double* [[A]], i32 0
238
+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double*> [[TMP0]], double* [[A]], i32 1
239
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x double*> [[TMP1]], <2 x i64> <i64 0, i64 2>
239
240
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
240
- ; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
241
241
; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
242
242
; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
243
243
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
244
244
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
245
- ; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
246
- ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
247
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
248
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
249
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[A1]], i32 1
250
- ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
251
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[B2]], i32 1
252
- ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
253
- ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
254
- ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
255
- ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP1]]
256
- ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
245
+ ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
246
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double*> [[TMP2]], i32 0
247
+ ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
248
+ ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
249
+ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
250
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A1]], i32 1
251
+ ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
252
+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B2]], i32 1
253
+ ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
254
+ ; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP6]]
255
+ ; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP12]], [[TMP11]]
257
256
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
258
257
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
259
- ; CHECK-NEXT: [[TMP11 :%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
260
- ; CHECK-NEXT: store <2 x double> [[TMP10 ]], <2 x double>* [[TMP11 ]], align 8
258
+ ; CHECK-NEXT: [[TMP14 :%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
259
+ ; CHECK-NEXT: store <2 x double> [[TMP13 ]], <2 x double>* [[TMP14 ]], align 8
261
260
; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
262
261
; CHECK-NEXT: ret void
263
262
;
@@ -328,31 +327,27 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
328
327
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
329
328
; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
330
329
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
330
+ ; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
331
331
; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
332
332
; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
333
333
; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
334
- ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
335
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
334
+ ; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
336
335
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
337
336
; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
338
337
; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
339
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
340
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
341
- ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
342
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
343
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1
344
- ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
345
- ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
346
- ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
347
- ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
338
+ ; CHECK-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
339
+ ; CHECK-NEXT: [[SUBC0D0:%.*]] = fsub fast double [[C0]], [[D0]]
340
+ ; CHECK-NEXT: [[SUBA1B2:%.*]] = fsub fast double [[A1]], [[B2]]
341
+ ; CHECK-NEXT: [[SUBA2B1:%.*]] = fsub fast double [[A2]], [[B1]]
342
+ ; CHECK-NEXT: [[ADD0:%.*]] = fadd fast double [[SUBA0B0]], [[SUBC0D0]]
343
+ ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[SUBA1B2]], [[SUBA2B1]]
348
344
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
349
345
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
350
- ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
351
- ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
352
- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
353
- ; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8
354
- ; CHECK-NEXT: store double [[TMP12]], double* [[EXT2:%.*]], align 8
355
- ; CHECK-NEXT: store double [[TMP12]], double* [[EXT3:%.*]], align 8
346
+ ; CHECK-NEXT: store double [[ADD0]], double* [[IDXS0]], align 8
347
+ ; CHECK-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8
348
+ ; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
349
+ ; CHECK-NEXT: store double [[A1]], double* [[EXT2:%.*]], align 8
350
+ ; CHECK-NEXT: store double [[A1]], double* [[EXT3:%.*]], align 8
356
351
; CHECK-NEXT: store double [[B1]], double* [[EXT4:%.*]], align 8
357
352
; CHECK-NEXT: store double [[B1]], double* [[EXT5:%.*]], align 8
358
353
; CHECK-NEXT: ret void
0 commit comments