|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 | 2 | ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize < %s -S -o - | FileCheck %s -check-prefix=OUTLOOP
|
3 | 3 | ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize -prefer-inloop-reductions < %s -S -o - | FileCheck %s -check-prefix=INLOOP
|
4 |
| -; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s |
| 4 | +; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL-OUTLOOP %s |
| 5 | +; RUN: opt -passes=loop-vectorize -prefer-inloop-reductions -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL-INLOOP %s |
| 6 | + |
5 | 7 |
|
6 | 8 | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
7 | 9 | target triple = "riscv64"
|
8 | 10 |
|
9 |
| -; FIXME: inloop reductions are not supported yet with predicated vectorization. |
10 |
| - |
11 | 11 | define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
|
12 | 12 | ; OUTLOOP-LABEL: @add_i16_i32(
|
13 | 13 | ; OUTLOOP-NEXT: entry:
|
@@ -117,69 +117,133 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
|
117 | 117 | ; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
118 | 118 | ; INLOOP-NEXT: ret i32 [[R_0_LCSSA]]
|
119 | 119 | ;
|
120 |
| -; IF-EVL-LABEL: @add_i16_i32( |
121 |
| -; IF-EVL-NEXT: entry: |
122 |
| -; IF-EVL-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
123 |
| -; IF-EVL-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] |
124 |
| -; IF-EVL: for.body.preheader: |
125 |
| -; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
126 |
| -; IF-EVL: vector.ph: |
127 |
| -; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() |
128 |
| -; IF-EVL-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 |
129 |
| -; IF-EVL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() |
130 |
| -; IF-EVL-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 |
131 |
| -; IF-EVL-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 |
132 |
| -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] |
133 |
| -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] |
134 |
| -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] |
135 |
| -; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 |
136 |
| -; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() |
137 |
| -; IF-EVL-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 |
138 |
| -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 |
139 |
| -; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
140 |
| -; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] |
141 |
| -; IF-EVL: vector.body: |
142 |
| -; IF-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
143 |
| -; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] |
144 |
| -; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 |
145 |
| -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[INDEX]], i64 0 |
146 |
| -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
147 |
| -; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() |
148 |
| -; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] |
149 |
| -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[TMP9]] |
150 |
| -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]] |
151 |
| -; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] |
152 |
| -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 |
153 |
| -; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP12]], i32 2, <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i16> poison) |
154 |
| -; IF-EVL-NEXT: [[TMP13:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> |
155 |
| -; IF-EVL-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP13]] |
156 |
| -; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]] |
157 |
| -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]] |
158 |
| -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
159 |
| -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
160 |
| -; IF-EVL: middle.block: |
161 |
| -; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP15]]) |
162 |
| -; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
163 |
| -; IF-EVL: scalar.ph: |
164 |
| -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] |
165 |
| -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] |
166 |
| -; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] |
167 |
| -; IF-EVL: for.body: |
168 |
| -; IF-EVL-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] |
169 |
| -; IF-EVL-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] |
170 |
| -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] |
171 |
| -; IF-EVL-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 |
172 |
| -; IF-EVL-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 |
173 |
| -; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] |
174 |
| -; IF-EVL-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 |
175 |
| -; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] |
176 |
| -; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
177 |
| -; IF-EVL: for.cond.cleanup.loopexit: |
178 |
| -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] |
179 |
| -; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP]] |
180 |
| -; IF-EVL: for.cond.cleanup: |
181 |
| -; IF-EVL-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] |
182 |
| -; IF-EVL-NEXT: ret i32 [[R_0_LCSSA]] |
| 120 | +; IF-EVL-OUTLOOP-LABEL: @add_i16_i32( |
| 121 | +; IF-EVL-OUTLOOP-NEXT: entry: |
| 122 | +; IF-EVL-OUTLOOP-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| 123 | +; IF-EVL-OUTLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] |
| 124 | +; IF-EVL-OUTLOOP: for.body.preheader: |
| 125 | +; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| 126 | +; IF-EVL-OUTLOOP: vector.ph: |
| 127 | +; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() |
| 128 | +; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 |
| 129 | +; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() |
| 130 | +; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 |
| 131 | +; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 |
| 132 | +; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] |
| 133 | +; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] |
| 134 | +; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] |
| 135 | +; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 |
| 136 | +; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() |
| 137 | +; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 |
| 138 | +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 |
| 139 | +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 140 | +; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] |
| 141 | +; IF-EVL-OUTLOOP: vector.body: |
| 142 | +; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 143 | +; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] |
| 144 | +; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 |
| 145 | +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[INDEX]], i64 0 |
| 146 | +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 147 | +; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() |
| 148 | +; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] |
| 149 | +; IF-EVL-OUTLOOP-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[TMP9]] |
| 150 | +; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]] |
| 151 | +; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] |
| 152 | +; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 |
| 153 | +; IF-EVL-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP12]], i32 2, <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i16> poison) |
| 154 | +; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> |
| 155 | +; IF-EVL-OUTLOOP-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP13]] |
| 156 | +; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]] |
| 157 | +; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]] |
| 158 | +; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| 159 | +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 160 | +; IF-EVL-OUTLOOP: middle.block: |
| 161 | +; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP15]]) |
| 162 | +; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
| 163 | +; IF-EVL-OUTLOOP: scalar.ph: |
| 164 | +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] |
| 165 | +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] |
| 166 | +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] |
| 167 | +; IF-EVL-OUTLOOP: for.body: |
| 168 | +; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] |
| 169 | +; IF-EVL-OUTLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] |
| 170 | +; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] |
| 171 | +; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 |
| 172 | +; IF-EVL-OUTLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 |
| 173 | +; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] |
| 174 | +; IF-EVL-OUTLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 |
| 175 | +; IF-EVL-OUTLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] |
| 176 | +; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| 177 | +; IF-EVL-OUTLOOP: for.cond.cleanup.loopexit: |
| 178 | +; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] |
| 179 | +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP]] |
| 180 | +; IF-EVL-OUTLOOP: for.cond.cleanup: |
| 181 | +; IF-EVL-OUTLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] |
| 182 | +; IF-EVL-OUTLOOP-NEXT: ret i32 [[R_0_LCSSA]] |
| 183 | +; |
| 184 | +; IF-EVL-INLOOP-LABEL: @add_i16_i32( |
| 185 | +; IF-EVL-INLOOP-NEXT: entry: |
| 186 | +; IF-EVL-INLOOP-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| 187 | +; IF-EVL-INLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] |
| 188 | +; IF-EVL-INLOOP: for.body.preheader: |
| 189 | +; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| 190 | +; IF-EVL-INLOOP: vector.ph: |
| 191 | +; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() |
| 192 | +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 |
| 193 | +; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() |
| 194 | +; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 |
| 195 | +; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 |
| 196 | +; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] |
| 197 | +; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] |
| 198 | +; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] |
| 199 | +; IF-EVL-INLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 |
| 200 | +; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() |
| 201 | +; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 8 |
| 202 | +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 |
| 203 | +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer |
| 204 | +; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] |
| 205 | +; IF-EVL-INLOOP: vector.body: |
| 206 | +; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 207 | +; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] |
| 208 | +; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 |
| 209 | +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[INDEX]], i64 0 |
| 210 | +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer |
| 211 | +; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i32> @llvm.experimental.stepvector.nxv8i32() |
| 212 | +; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i32> zeroinitializer, [[TMP8]] |
| 213 | +; IF-EVL-INLOOP-NEXT: [[VEC_IV:%.*]] = add <vscale x 8 x i32> [[BROADCAST_SPLAT]], [[TMP9]] |
| 214 | +; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]] |
| 215 | +; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] |
| 216 | +; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 |
| 217 | +; IF-EVL-INLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP12]], i32 2, <vscale x 8 x i1> [[TMP10]], <vscale x 8 x i16> poison) |
| 218 | +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = sext <vscale x 8 x i16> [[WIDE_MASKED_LOAD]] to <vscale x 8 x i32> |
| 219 | +; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = select <vscale x 8 x i1> [[TMP10]], <vscale x 8 x i32> [[TMP13]], <vscale x 8 x i32> zeroinitializer |
| 220 | +; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP14]]) |
| 221 | +; IF-EVL-INLOOP-NEXT: [[TMP16]] = add i32 [[TMP15]], [[VEC_PHI]] |
| 222 | +; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]] |
| 223 | +; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| 224 | +; IF-EVL-INLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 225 | +; IF-EVL-INLOOP: middle.block: |
| 226 | +; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
| 227 | +; IF-EVL-INLOOP: scalar.ph: |
| 228 | +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] |
| 229 | +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] |
| 230 | +; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] |
| 231 | +; IF-EVL-INLOOP: for.body: |
| 232 | +; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] |
| 233 | +; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] |
| 234 | +; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] |
| 235 | +; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 |
| 236 | +; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 |
| 237 | +; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] |
| 238 | +; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 |
| 239 | +; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] |
| 240 | +; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| 241 | +; IF-EVL-INLOOP: for.cond.cleanup.loopexit: |
| 242 | +; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] |
| 243 | +; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]] |
| 244 | +; IF-EVL-INLOOP: for.cond.cleanup: |
| 245 | +; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] |
| 246 | +; IF-EVL-INLOOP-NEXT: ret i32 [[R_0_LCSSA]] |
183 | 247 | ;
|
184 | 248 | entry:
|
185 | 249 | %cmp6 = icmp sgt i32 %n, 0
|
|
0 commit comments