@@ -167,10 +167,8 @@ float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t r
167
167
}
168
168
169
169
// CHECK-LABEL: @test_vcmlaq_lane_f16(
170
- // CHECK: [[CPLX:%.*]] = bitcast <4 x half> %rhs to <2 x i32>
171
- // CHECK: [[DUP:%.*]] = shufflevector <2 x i32> [[CPLX]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
172
- // CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
173
- // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
170
+ // CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
171
+ // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
174
172
// CHECK: ret <8 x half> [[RES]]
175
173
float16x8_t test_vcmlaq_lane_f16 (float16x8_t acc , float16x8_t lhs , float16x4_t rhs ) {
176
174
return vcmlaq_lane_f16 (acc , lhs , rhs , 1 );
@@ -243,10 +241,8 @@ float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16
243
241
}
244
242
245
243
// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
246
- // CHECK: [[CPLX:%.*]] = bitcast <4 x half> %rhs to <2 x i32>
247
- // CHECK: [[DUP:%.*]] = shufflevector <2 x i32> [[CPLX]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
248
- // CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
249
- // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
244
+ // CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
245
+ // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
250
246
// CHECK: ret <8 x half> [[RES]]
251
247
float16x8_t test_vcmlaq_rot90_lane_f16 (float16x8_t acc , float16x8_t lhs , float16x4_t rhs ) {
252
248
return vcmlaq_rot90_lane_f16 (acc , lhs , rhs , 1 );
@@ -319,10 +315,8 @@ float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float1
319
315
}
320
316
321
317
// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
322
- // CHECK: [[CPLX:%.*]] = bitcast <4 x half> %rhs to <2 x i32>
323
- // CHECK: [[DUP:%.*]] = shufflevector <2 x i32> [[CPLX]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
324
- // CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
325
- // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
318
+ // CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
319
+ // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
326
320
// CHECK: ret <8 x half> [[RES]]
327
321
float16x8_t test_vcmlaq_rot180_lane_f16 (float16x8_t acc , float16x8_t lhs , float16x4_t rhs ) {
328
322
return vcmlaq_rot180_lane_f16 (acc , lhs , rhs , 1 );
@@ -395,10 +389,8 @@ float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float1
395
389
}
396
390
397
391
// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
398
- // CHECK: [[CPLX:%.*]] = bitcast <4 x half> %rhs to <2 x i32>
399
- // CHECK: [[DUP:%.*]] = shufflevector <2 x i32> [[CPLX]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
400
- // CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
401
- // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
392
+ // CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
393
+ // CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
402
394
// CHECK: ret <8 x half> [[RES]]
403
395
float16x8_t test_vcmlaq_rot270_lane_f16 (float16x8_t acc , float16x8_t lhs , float16x4_t rhs ) {
404
396
return vcmlaq_rot270_lane_f16 (acc , lhs , rhs , 1 );
0 commit comments