Skip to content

Commit 4efcea9

Browse files
committed
[ARM][AArch64] Some additional for bitcast splats. NFC
1 parent 84c3c69 commit 4efcea9

File tree

3 files changed

+524
-79
lines changed

3 files changed

+524
-79
lines changed

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,3 +504,89 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
504504
%tmp5 = xor <4 x i32> %tmp3, %tmp4
505505
ret <4 x i32> %tmp5
506506
}
507+
508+
define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
509+
; CHECK-LABEL: bitcast_i64_v8i16:
510+
; CHECK: // %bb.0:
511+
; CHECK-NEXT: fmov d0, x0
512+
; CHECK-NEXT: dup.8h v0, v0[0]
513+
; CHECK-NEXT: ret
514+
%b = bitcast i64 %a to <4 x i16>
515+
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
516+
ret <8 x i16> %r
517+
}
518+
519+
define <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
520+
; CHECK-LABEL: bitcast_i64_v8i16_lane1:
521+
; CHECK: // %bb.0:
522+
; CHECK-NEXT: fmov d0, x0
523+
; CHECK-NEXT: dup.8h v0, v0[1]
524+
; CHECK-NEXT: ret
525+
%b = bitcast i64 %a to <4 x i16>
526+
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
527+
ret <8 x i16> %r
528+
}
529+
530+
define <8 x i16> @bitcast_f64_v8i16(double %a) {
531+
; CHECK-LABEL: bitcast_f64_v8i16:
532+
; CHECK: // %bb.0:
533+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
534+
; CHECK-NEXT: dup.8h v0, v0[0]
535+
; CHECK-NEXT: ret
536+
%b = bitcast double %a to <4 x i16>
537+
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
538+
ret <8 x i16> %r
539+
}
540+
541+
define <8 x half> @bitcast_i64_v8f16(i64 %a) {
542+
; CHECK-LABEL: bitcast_i64_v8f16:
543+
; CHECK: // %bb.0:
544+
; CHECK-NEXT: fmov d0, x0
545+
; CHECK-NEXT: dup.8h v0, v0[0]
546+
; CHECK-NEXT: ret
547+
%b = bitcast i64 %a to <4 x half>
548+
%r = shufflevector <4 x half> %b, <4 x half> poison, <8 x i32> zeroinitializer
549+
ret <8 x half> %r
550+
}
551+
552+
define <2 x i64> @bitcast_i64_v2f64(i64 %a) {
553+
; CHECK-LABEL: bitcast_i64_v2f64:
554+
; CHECK: // %bb.0:
555+
; CHECK-NEXT: fmov d0, x0
556+
; CHECK-NEXT: dup.2d v0, v0[0]
557+
; CHECK-NEXT: ret
558+
%b = bitcast i64 %a to <1 x i64>
559+
%r = shufflevector <1 x i64> %b, <1 x i64> poison, <2 x i32> zeroinitializer
560+
ret <2 x i64> %r
561+
}
562+
563+
define <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) {
564+
; CHECK-LABEL: bitcast_v2f64_v2i64:
565+
; CHECK: // %bb.0:
566+
; CHECK-NEXT: dup.2d v0, v0[0]
567+
; CHECK-NEXT: ret
568+
%b = bitcast <2 x double> %a to <2 x i64>
569+
%r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
570+
ret <2 x i64> %r
571+
}
572+
573+
define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) {
574+
; CHECK-LABEL: bitcast_v8i16_v2i64:
575+
; CHECK: // %bb.0:
576+
; CHECK-NEXT: dup.2d v0, v0[0]
577+
; CHECK-NEXT: ret
578+
%b = bitcast <8 x i16> %a to <2 x i64>
579+
%r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
580+
ret <2 x i64> %r
581+
}
582+
583+
define <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
584+
; CHECK-LABEL: bitcast_v2f64_v8i16:
585+
; CHECK: // %bb.0:
586+
; CHECK-NEXT: dup.8h v0, v0[0]
587+
; CHECK-NEXT: ret
588+
%b = bitcast <2 x i64> %a to <8 x i16>
589+
%r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
590+
ret <8 x i16> %r
591+
}
592+

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2534,4 +2534,49 @@ entry:
25342534
ret i128 %vmull3.i.i
25352535
}
25362536

2537-
2537+
define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) {
2538+
; CHECK-LABEL: cmplx_mul_combined_re_im:
2539+
; CHECK: // %bb.0: // %entry
2540+
; CHECK-NEXT: lsr x8, x0, #16
2541+
; CHECK-NEXT: fmov d4, x0
2542+
; CHECK-NEXT: rev32 v5.8h, v0.8h
2543+
; CHECK-NEXT: fmov d1, x8
2544+
; CHECK-NEXT: adrp x8, .LCPI196_0
2545+
; CHECK-NEXT: dup v1.8h, v1.h[0]
2546+
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
2547+
; CHECK-NEXT: sqneg v2.8h, v1.8h
2548+
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
2549+
; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0]
2550+
; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v4.h[0]
2551+
; CHECK-NEXT: sqdmlal v2.4s, v5.4h, v1.4h
2552+
; CHECK-NEXT: sqdmlal2 v0.4s, v5.8h, v1.8h
2553+
; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
2554+
; CHECK-NEXT: ret
2555+
entry:
2556+
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
2557+
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
2558+
%vec.scale.coerce = bitcast i64 %scale.coerce to <4 x i16>
2559+
%vec.scale.sroa.2.0.extract.shift23 = bitcast i64 %scale.sroa.2.0.extract.shift23 to <4 x i16>
2560+
%vecinit7.i25 = shufflevector <4 x i16> %vec.scale.sroa.2.0.extract.shift23, <4 x i16> poison, <8 x i32> zeroinitializer
2561+
%vqnegq_v1.i = tail call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %vecinit7.i25)
2562+
%0 = shufflevector <8 x i16> %vqnegq_v1.i, <8 x i16> %vecinit7.i25, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
2563+
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2564+
%shuffle.i3.i = shufflevector <4 x i16> %vec.scale.coerce, <4 x i16> poison, <4 x i32> zeroinitializer
2565+
%vqdmull_v2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
2566+
%shuffle.i.i26 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2567+
%vqdmull_v2.i.i28 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i26, <4 x i16> %shuffle.i3.i)
2568+
%shuffle.i.i29 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2569+
%shuffle.i3.i30 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2570+
%vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i29, <4 x i16> %shuffle.i3.i30)
2571+
%vqdmlal_v3.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i.i, <4 x i32> %vqdmlal2.i.i)
2572+
%shuffle.i.i31 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2573+
%shuffle.i3.i32 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2574+
%vqdmlal2.i.i33 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i31, <4 x i16> %shuffle.i3.i32)
2575+
%vqdmlal_v3.i.i34 = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i.i28, <4 x i32> %vqdmlal2.i.i33)
2576+
%1 = bitcast <4 x i32> %vqdmlal_v3.i.i to <8 x i16>
2577+
%2 = bitcast <4 x i32> %vqdmlal_v3.i.i34 to <8 x i16>
2578+
%shuffle.i35 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
2579+
ret <8 x i16> %shuffle.i35
2580+
}
2581+
2582+
declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>)

0 commit comments

Comments
 (0)