@@ -318,22 +318,14 @@ entry:
318
318
define float @f (ptr nocapture readonly %x ) {
319
319
; CHECK-LABEL: @f(
320
320
; CHECK-NEXT: entry:
321
- ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
322
- ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
323
- ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
324
- ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
325
- ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
326
- ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
321
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
322
+ ; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
327
323
; CHECK-NEXT: ret float [[OP_RDX]]
328
324
;
329
325
; THRESHOLD-LABEL: @f(
330
326
; THRESHOLD-NEXT: entry:
331
- ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
332
- ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
333
- ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
334
- ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
335
- ; THRESHOLD-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
336
- ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
327
+ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
328
+ ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
337
329
; THRESHOLD-NEXT: ret float [[OP_RDX]]
338
330
;
339
331
entry:
@@ -606,18 +598,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
606
598
; CHECK-LABEL: @loadadd31(
607
599
; CHECK-NEXT: entry:
608
600
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
609
- ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
610
- ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
611
- ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
601
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
612
602
; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
613
603
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
614
604
; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
615
605
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
616
606
; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
617
607
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
618
- ; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
619
- ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
620
- ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
608
+ ; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
621
609
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
622
610
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
623
611
; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
@@ -627,18 +615,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
627
615
; THRESHOLD-LABEL: @loadadd31(
628
616
; THRESHOLD-NEXT: entry:
629
617
; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
630
- ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
631
- ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
632
- ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
618
+ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
633
619
; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
634
620
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
635
621
; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
636
622
; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
637
623
; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
638
624
; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
639
- ; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
640
- ; THRESHOLD-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
641
- ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
625
+ ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
642
626
; THRESHOLD-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
643
627
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
644
628
; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
0 commit comments