|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
2 |
| -; RUN: opt < %s -passes=interleaved-access -mtriple=aarch64-linux-gnu -mattr=+sve -S | FileCheck %s |
| 2 | +; RUN: opt < %s -passes=loop-vectorize,interleaved-access -mtriple=aarch64-linux-gnu -mattr=+sve -S | FileCheck %s |
3 | 3 |
|
4 | 4 |
|
5 | 5 | define void @deinterleave4(ptr %src) {
|
@@ -136,3 +136,145 @@ define void @negative_deinterleave4_test(ptr %src) {
|
136 | 136 |
|
137 | 137 | ret void
|
138 | 138 | }
|
| 139 | + |
| 140 | +%struct.xyzt = type { i32, i32, i32, i32 } |
| 141 | + |
| 142 | +define void @interleave_deinterleave(ptr writeonly %dst, ptr readonly %a, ptr readonly %b) { |
| 143 | +; CHECK-LABEL: define void @interleave_deinterleave |
| 144 | +; CHECK-SAME: (ptr writeonly [[DST:%.*]], ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR0]] { |
| 145 | +; CHECK-NEXT: entry: |
| 146 | +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 147 | +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 |
| 148 | +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]]) |
| 149 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]] |
| 150 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] |
| 151 | +; CHECK: vector.memcheck: |
| 152 | +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 16384 |
| 153 | +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 16384 |
| 154 | +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i64 16384 |
| 155 | +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] |
| 156 | +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] |
| 157 | +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] |
| 158 | +; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] |
| 159 | +; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] |
| 160 | +; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] |
| 161 | +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] |
| 162 | +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] |
| 163 | +; CHECK: vector.ph: |
| 164 | +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() |
| 165 | +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 |
| 166 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] |
| 167 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] |
| 168 | +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() |
| 169 | +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 |
| 170 | +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| 171 | +; CHECK: vector.body: |
| 172 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 173 | +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 |
| 174 | +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A]], i64 [[TMP7]] |
| 175 | +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 |
| 176 | +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP9]]) |
| 177 | +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 |
| 178 | +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 |
| 179 | +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 |
| 180 | +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3 |
| 181 | +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[TMP7]] |
| 182 | +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 |
| 183 | +; CHECK-NEXT: [[LDN14:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP15]]) |
| 184 | +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 0 |
| 185 | +; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 1 |
| 186 | +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 2 |
| 187 | +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 3 |
| 188 | +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP16]], [[TMP10]] |
| 189 | +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[TMP7]] |
| 190 | +; CHECK-NEXT: [[TMP22:%.*]] = sub nsw <vscale x 4 x i32> [[TMP11]], [[TMP17]] |
| 191 | +; CHECK-NEXT: [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP12]], [[TMP18]] |
| 192 | +; CHECK-NEXT: [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP13]], [[TMP19]] |
| 193 | +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 12 |
| 194 | +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 -3 |
| 195 | +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP26]]) |
| 196 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] |
| 197 | +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 198 | +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 199 | +; CHECK: middle.block: |
| 200 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] |
| 201 | +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] |
| 202 | +; CHECK: scalar.ph: |
| 203 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] |
| 204 | +; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| 205 | +; CHECK: for.body: |
| 206 | +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] |
| 207 | +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]] |
| 208 | +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| 209 | +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]] |
| 210 | +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 |
| 211 | +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] |
| 212 | +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]] |
| 213 | +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4 |
| 214 | +; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 |
| 215 | +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[Y]], align 4 |
| 216 | +; CHECK-NEXT: [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4 |
| 217 | +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[Y11]], align 4 |
| 218 | +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP30]], [[TMP31]] |
| 219 | +; CHECK-NEXT: [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4 |
| 220 | +; CHECK-NEXT: store i32 [[SUB]], ptr [[Y14]], align 4 |
| 221 | +; CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 |
| 222 | +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[Z]], align 4 |
| 223 | +; CHECK-NEXT: [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8 |
| 224 | +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[Z19]], align 4 |
| 225 | +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP32]], [[TMP33]] |
| 226 | +; CHECK-NEXT: [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8 |
| 227 | +; CHECK-NEXT: store i32 [[SHL]], ptr [[Z22]], align 4 |
| 228 | +; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 |
| 229 | +; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[T]], align 4 |
| 230 | +; CHECK-NEXT: [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12 |
| 231 | +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[T27]], align 4 |
| 232 | +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP34]], [[TMP35]] |
| 233 | +; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12 |
| 234 | +; CHECK-NEXT: store i32 [[SHR]], ptr [[T30]], align 4 |
| 235 | +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 |
| 236 | +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 |
| 237 | +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| 238 | +; CHECK: for.cond.cleanup: |
| 239 | +; CHECK-NEXT: ret void |
| 240 | +; |
| 241 | +entry: |
| 242 | + br label %for.body |
| 243 | + |
| 244 | +for.body: |
| 245 | + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] |
| 246 | + %arrayidx = getelementptr inbounds %struct.xyzt, ptr %a, i64 %indvars.iv |
| 247 | + %0 = load i32, ptr %arrayidx, align 4 |
| 248 | + %arrayidx2 = getelementptr inbounds %struct.xyzt, ptr %b, i64 %indvars.iv |
| 249 | + %1 = load i32, ptr %arrayidx2, align 4 |
| 250 | + %add = add nsw i32 %1, %0 |
| 251 | + %arrayidx5 = getelementptr inbounds %struct.xyzt, ptr %dst, i64 %indvars.iv |
| 252 | + store i32 %add, ptr %arrayidx5, align 4 |
| 253 | + %y = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4 |
| 254 | + %2 = load i32, ptr %y, align 4 |
| 255 | + %y11 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 4 |
| 256 | + %3 = load i32, ptr %y11, align 4 |
| 257 | + %sub = sub nsw i32 %2, %3 |
| 258 | + %y14 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 4 |
| 259 | + store i32 %sub, ptr %y14, align 4 |
| 260 | + %z = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8 |
| 261 | + %4 = load i32, ptr %z, align 4 |
| 262 | + %z19 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 8 |
| 263 | + %5 = load i32, ptr %z19, align 4 |
| 264 | + %shl = shl i32 %4, %5 |
| 265 | + %z22 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 8 |
| 266 | + store i32 %shl, ptr %z22, align 4 |
| 267 | + %t = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12 |
| 268 | + %6 = load i32, ptr %t, align 4 |
| 269 | + %t27 = getelementptr inbounds nuw i8, ptr %arrayidx2, i64 12 |
| 270 | + %7 = load i32, ptr %t27, align 4 |
| 271 | + %shr = ashr i32 %6, %7 |
| 272 | + %t30 = getelementptr inbounds nuw i8, ptr %arrayidx5, i64 12 |
| 273 | + store i32 %shr, ptr %t30, align 4 |
| 274 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 275 | + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 |
| 276 | + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body |
| 277 | + |
| 278 | +for.cond.cleanup: |
| 279 | + ret void |
| 280 | +} |
0 commit comments