@@ -2534,4 +2534,49 @@ entry:
2534
2534
ret i128 %vmull3.i.i
2535
2535
}
2536
2536
2537
-
2537
+ define <8 x i16 > @cmplx_mul_combined_re_im (<8 x i16 > noundef %a , i64 %scale.coerce ) {
2538
+ ; CHECK-LABEL: cmplx_mul_combined_re_im:
2539
+ ; CHECK: // %bb.0: // %entry
2540
+ ; CHECK-NEXT: lsr x8, x0, #16
2541
+ ; CHECK-NEXT: fmov d4, x0
2542
+ ; CHECK-NEXT: rev32 v5.8h, v0.8h
2543
+ ; CHECK-NEXT: fmov d1, x8
2544
+ ; CHECK-NEXT: adrp x8, .LCPI196_0
2545
+ ; CHECK-NEXT: dup v1.8h, v1.h[0]
2546
+ ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
2547
+ ; CHECK-NEXT: sqneg v2.8h, v1.8h
2548
+ ; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
2549
+ ; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0]
2550
+ ; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v4.h[0]
2551
+ ; CHECK-NEXT: sqdmlal v2.4s, v5.4h, v1.4h
2552
+ ; CHECK-NEXT: sqdmlal2 v0.4s, v5.8h, v1.8h
2553
+ ; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
2554
+ ; CHECK-NEXT: ret
2555
+ entry:
2556
+ %scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce , 16
2557
+ %shuffle.i = shufflevector <8 x i16 > %a , <8 x i16 > poison, <8 x i32 > <i32 1 , i32 0 , i32 3 , i32 2 , i32 5 , i32 4 , i32 7 , i32 6 >
2558
+ %vec.scale.coerce = bitcast i64 %scale.coerce to <4 x i16 >
2559
+ %vec.scale.sroa.2.0.extract.shift23 = bitcast i64 %scale.sroa.2.0.extract.shift23 to <4 x i16 >
2560
+ %vecinit7.i25 = shufflevector <4 x i16 > %vec.scale.sroa.2.0.extract.shift23 , <4 x i16 > poison, <8 x i32 > zeroinitializer
2561
+ %vqnegq_v1.i = tail call <8 x i16 > @llvm.aarch64.neon.sqneg.v8i16 (<8 x i16 > %vecinit7.i25 )
2562
+ %0 = shufflevector <8 x i16 > %vqnegq_v1.i , <8 x i16 > %vecinit7.i25 , <8 x i32 > <i32 0 , i32 9 , i32 2 , i32 11 , i32 4 , i32 13 , i32 6 , i32 15 >
2563
+ %shuffle.i.i = shufflevector <8 x i16 > %a , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
2564
+ %shuffle.i3.i = shufflevector <4 x i16 > %vec.scale.coerce , <4 x i16 > poison, <4 x i32 > zeroinitializer
2565
+ %vqdmull_v2.i.i = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i.i , <4 x i16 > %shuffle.i3.i )
2566
+ %shuffle.i.i26 = shufflevector <8 x i16 > %a , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
2567
+ %vqdmull_v2.i.i28 = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i.i26 , <4 x i16 > %shuffle.i3.i )
2568
+ %shuffle.i.i29 = shufflevector <8 x i16 > %shuffle.i , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
2569
+ %shuffle.i3.i30 = shufflevector <8 x i16 > %0 , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
2570
+ %vqdmlal2.i.i = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i.i29 , <4 x i16 > %shuffle.i3.i30 )
2571
+ %vqdmlal_v3.i.i = tail call <4 x i32 > @llvm.aarch64.neon.sqadd.v4i32 (<4 x i32 > %vqdmull_v2.i.i , <4 x i32 > %vqdmlal2.i.i )
2572
+ %shuffle.i.i31 = shufflevector <8 x i16 > %shuffle.i , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
2573
+ %shuffle.i3.i32 = shufflevector <8 x i16 > %0 , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
2574
+ %vqdmlal2.i.i33 = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i.i31 , <4 x i16 > %shuffle.i3.i32 )
2575
+ %vqdmlal_v3.i.i34 = tail call <4 x i32 > @llvm.aarch64.neon.sqadd.v4i32 (<4 x i32 > %vqdmull_v2.i.i28 , <4 x i32 > %vqdmlal2.i.i33 )
2576
+ %1 = bitcast <4 x i32 > %vqdmlal_v3.i.i to <8 x i16 >
2577
+ %2 = bitcast <4 x i32 > %vqdmlal_v3.i.i34 to <8 x i16 >
2578
+ %shuffle.i35 = shufflevector <8 x i16 > %1 , <8 x i16 > %2 , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
2579
+ ret <8 x i16 > %shuffle.i35
2580
+ }
2581
+
2582
+ declare <8 x i16 > @llvm.aarch64.neon.sqneg.v8i16 (<8 x i16 >)
0 commit comments