@@ -49,6 +49,52 @@ for.end:
49
49
ret float %add
50
50
}
51
51
52
+ ; Same as above but where fadd has a fast-math flag.
53
+ define float @fadd_strict_fmf (float * noalias nocapture readonly %a , i64 %n ) {
54
+ ; CHECK-ORDERED-LABEL: @fadd_strict_fmf
55
+ ; CHECK-ORDERED: vector.body:
56
+ ; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[RDX:%.*]], %vector.body ]
57
+ ; CHECK-ORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
58
+ ; CHECK-ORDERED: [[RDX]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI]], <8 x float> [[LOAD_VEC]])
59
+ ; CHECK-ORDERED: for.end:
60
+ ; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX]], %middle.block ]
61
+ ; CHECK-ORDERED: ret float [[RES]]
62
+
63
+ ; CHECK-UNORDERED-LABEL: @fadd_strict_fmf
64
+ ; CHECK-UNORDERED: vector.body:
65
+ ; CHECK-UNORDERED: [[VEC_PHI:%.*]] = phi <8 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ [[FADD_VEC:%.*]], %vector.body ]
66
+ ; CHECK-UNORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
67
+ ; CHECK-UNORDERED: [[FADD_VEC]] = fadd nnan <8 x float> [[LOAD_VEC]], [[VEC_PHI]]
68
+ ; CHECK-UNORDERED-NOT: @llvm.vector.reduce.fadd
69
+ ; CHECK-UNORDERED: middle.block:
70
+ ; CHECK-UNORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[FADD_VEC]])
71
+ ; CHECK-UNORDERED: for.body:
72
+ ; CHECK-UNORDERED: [[LOAD:%.*]] = load float, float*
73
+ ; CHECK-UNORDERED: [[FADD:%.*]] = fadd nnan float [[LOAD]], {{.*}}
74
+ ; CHECK-UNORDERED: for.end:
75
+ ; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD]], %for.body ], [ [[RDX]], %middle.block ]
76
+ ; CHECK-UNORDERED: ret float [[RES]]
77
+
78
+ ; CHECK-NOT-VECTORIZED-LABEL: @fadd_strict_fmf
79
+ ; CHECK-NOT-VECTORIZED-NOT: vector.body
80
+
81
+ entry:
82
+ br label %for.body
83
+
84
+ for.body:
85
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
86
+ %sum.07 = phi float [ 0 .000000e+00 , %entry ], [ %add , %for.body ]
87
+ %arrayidx = getelementptr inbounds float , float * %a , i64 %iv
88
+ %0 = load float , float * %arrayidx , align 4
89
+ %add = fadd nnan float %0 , %sum.07
90
+ %iv.next = add nuw nsw i64 %iv , 1
91
+ %exitcond.not = icmp eq i64 %iv.next , %n
92
+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
93
+
94
+ for.end:
95
+ ret float %add
96
+ }
97
+
52
98
define float @fadd_strict_unroll (float * noalias nocapture readonly %a , i64 %n ) {
53
99
; CHECK-ORDERED-LABEL: @fadd_strict_unroll
54
100
; CHECK-ORDERED: vector.body:
@@ -767,6 +813,79 @@ for.end:
767
813
ret float %add
768
814
}
769
815
816
+ ; Same as above but where fadd has a fast-math flag.
817
+ define float @fadd_scalar_vf_fmf (float * noalias nocapture readonly %a , i64 %n ) {
818
+ ; CHECK-ORDERED-LABEL: @fadd_scalar_vf_fmf
819
+ ; CHECK-ORDERED: vector.body:
820
+ ; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[FADD4:%.*]], %vector.body ]
821
+ ; CHECK-ORDERED: [[LOAD1:%.*]] = load float, float*
822
+ ; CHECK-ORDERED: [[LOAD2:%.*]] = load float, float*
823
+ ; CHECK-ORDERED: [[LOAD3:%.*]] = load float, float*
824
+ ; CHECK-ORDERED: [[LOAD4:%.*]] = load float, float*
825
+ ; CHECK-ORDERED: [[FADD1:%.*]] = fadd nnan float [[VEC_PHI]], [[LOAD1]]
826
+ ; CHECK-ORDERED: [[FADD2:%.*]] = fadd nnan float [[FADD1]], [[LOAD2]]
827
+ ; CHECK-ORDERED: [[FADD3:%.*]] = fadd nnan float [[FADD2]], [[LOAD3]]
828
+ ; CHECK-ORDERED: [[FADD4]] = fadd nnan float [[FADD3]], [[LOAD4]]
829
+ ; CHECK-ORDERED-NOT: @llvm.vector.reduce.fadd
830
+ ; CHECK-ORDERED: scalar.ph:
831
+ ; CHECK-ORDERED: [[MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %entry ], [ [[FADD4]], %middle.block ]
832
+ ; CHECK-ORDERED: for.body:
833
+ ; CHECK-ORDERED: [[SUM_07:%.*]] = phi float [ [[MERGE_RDX]], %scalar.ph ], [ [[FADD5:%.*]], %for.body ]
834
+ ; CHECK-ORDERED: [[LOAD5:%.*]] = load float, float*
835
+ ; CHECK-ORDERED: [[FADD5]] = fadd nnan float [[LOAD5]], [[SUM_07]]
836
+ ; CHECK-ORDERED: for.end:
837
+ ; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[FADD5]], %for.body ], [ [[FADD4]], %middle.block ]
838
+ ; CHECK-ORDERED: ret float [[RES]]
839
+
840
+ ; CHECK-UNORDERED-LABEL: @fadd_scalar_vf_fmf
841
+ ; CHECK-UNORDERED: vector.body:
842
+ ; CHECK-UNORDERED: [[VEC_PHI1:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[FADD1:%.*]], %vector.body ]
843
+ ; CHECK-UNORDERED: [[VEC_PHI2:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD2:%.*]], %vector.body ]
844
+ ; CHECK-UNORDERED: [[VEC_PHI3:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD3:%.*]], %vector.body ]
845
+ ; CHECK-UNORDERED: [[VEC_PHI4:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD4:%.*]], %vector.body ]
846
+ ; CHECK-UNORDERED: [[LOAD1:%.*]] = load float, float*
847
+ ; CHECK-UNORDERED: [[LOAD2:%.*]] = load float, float*
848
+ ; CHECK-UNORDERED: [[LOAD3:%.*]] = load float, float*
849
+ ; CHECK-UNORDERED: [[LOAD4:%.*]] = load float, float*
850
+ ; CHECK-UNORDERED: [[FADD1]] = fadd nnan float [[LOAD1]], [[VEC_PHI1]]
851
+ ; CHECK-UNORDERED: [[FADD2]] = fadd nnan float [[LOAD2]], [[VEC_PHI2]]
852
+ ; CHECK-UNORDERED: [[FADD3]] = fadd nnan float [[LOAD3]], [[VEC_PHI3]]
853
+ ; CHECK-UNORDERED: [[FADD4]] = fadd nnan float [[LOAD4]], [[VEC_PHI4]]
854
+ ; CHECK-UNORDERED-NOT: @llvm.vector.reduce.fadd
855
+ ; CHECK-UNORDERED: middle.block:
856
+ ; CHECK-UNORDERED: [[BIN_RDX1:%.*]] = fadd nnan float [[FADD2]], [[FADD1]]
857
+ ; CHECK-UNORDERED: [[BIN_RDX2:%.*]] = fadd nnan float [[FADD3]], [[BIN_RDX1]]
858
+ ; CHECK-UNORDERED: [[BIN_RDX3:%.*]] = fadd nnan float [[FADD4]], [[BIN_RDX2]]
859
+ ; CHECK-UNORDERED: scalar.ph:
860
+ ; CHECK-UNORDERED: [[MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %entry ], [ [[BIN_RDX3]], %middle.block ]
861
+ ; CHECK-UNORDERED: for.body:
862
+ ; CHECK-UNORDERED: [[SUM_07:%.*]] = phi float [ [[MERGE_RDX]], %scalar.ph ], [ [[FADD5:%.*]], %for.body ]
863
+ ; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float*
864
+ ; CHECK-UNORDERED: [[FADD5]] = fadd nnan float [[LOAD5]], [[SUM_07]]
865
+ ; CHECK-UORDERED: for.end
866
+ ; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD5]], %for.body ], [ [[BIN_RDX3]], %middle.block ]
867
+ ; CHECK-UNORDERED: ret float [[RES]]
868
+
869
+ ; CHECK-NOT-VECTORIZED-LABEL: @fadd_scalar_vf_fmf
870
+ ; CHECK-NOT-VECTORIZED-NOT: vector.body
871
+
872
+ entry:
873
+ br label %for.body
874
+
875
+ for.body:
876
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
877
+ %sum.07 = phi float [ 0 .000000e+00 , %entry ], [ %add , %for.body ]
878
+ %arrayidx = getelementptr inbounds float , float * %a , i64 %iv
879
+ %0 = load float , float * %arrayidx , align 4
880
+ %add = fadd nnan float %0 , %sum.07
881
+ %iv.next = add nuw nsw i64 %iv , 1
882
+ %exitcond.not = icmp eq i64 %iv.next , %n
883
+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !4
884
+
885
+ for.end:
886
+ ret float %add
887
+ }
888
+
770
889
; Test case where the reduction step is a first-order recurrence.
771
890
define double @reduction_increment_by_first_order_recurrence () {
772
891
; CHECK-ORDERED-LABEL: @reduction_increment_by_first_order_recurrence(
0 commit comments