@@ -466,3 +466,35 @@ define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(1
466
466
%r = insertelement <2 x float > undef , float %s , i32 0
467
467
ret <2 x float > %r
468
468
}
469
+
470
+ declare float * @getscaleptr ()
471
+ define void @PR47558_multiple_use_load (<2 x float >* nocapture nonnull %resultptr , <2 x float >* nocapture nonnull readonly %opptr ) {
472
+ ; CHECK-LABEL: @PR47558_multiple_use_load(
473
+ ; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr()
474
+ ; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, <2 x float>* [[OPPTR:%.*]], align 4
475
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[SCALEPTR]] to <4 x float>*
476
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
477
+ ; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
478
+ ; CHECK-NEXT: [[SCALE:%.*]] = load float, float* [[SCALEPTR]], align 16
479
+ ; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
480
+ ; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
481
+ ; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
482
+ ; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0
483
+ ; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
484
+ ; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
485
+ ; CHECK-NEXT: store <2 x float> [[RESULT1]], <2 x float>* [[RESULTPTR:%.*]], align 8
486
+ ; CHECK-NEXT: ret void
487
+ ;
488
+ %scaleptr = tail call nonnull align 16 dereferenceable (64 ) float * @getscaleptr ()
489
+ %op = load <2 x float >, <2 x float >* %opptr , align 4
490
+ %scale = load float , float * %scaleptr , align 16
491
+ %t1 = insertelement <2 x float > undef , float %scale , i32 0
492
+ %t2 = insertelement <2 x float > %t1 , float %scale , i32 1
493
+ %t3 = fmul <2 x float > %op , %t2
494
+ %t4 = extractelement <2 x float > %t3 , i32 0
495
+ %result0 = insertelement <2 x float > undef , float %t4 , i32 0
496
+ %t5 = extractelement <2 x float > %t3 , i32 1
497
+ %result1 = insertelement <2 x float > %result0 , float %t5 , i32 1
498
+ store <2 x float > %result1 , <2 x float >* %resultptr , align 8
499
+ ret void
500
+ }
0 commit comments