Skip to content

Commit ffc2233

Browse files
authored
[AArch64][SVE2] Add pattern for constructive EXT instruction. (#115047)
rdar://137214338
1 parent 6ccbf1d commit ffc2233

File tree

2 files changed

+92
-32
lines changed

2 files changed

+92
-32
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3845,6 +3845,10 @@ let Predicates = [HasSVE2orSME] in {
38453845

38463846
// SVE2 extract vector (immediate offset, constructive)
38473847
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
3848+
let AddedComplexity = 2 in {
3849+
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))),
3850+
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>;
3851+
}
38483852
} // End HasSVE2orSME
38493853

38503854
let Predicates = [HasSVE2] in {

llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Lines changed: 88 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -628,87 +628,143 @@ define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y)
628628
}
629629

630630
define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
631-
; CHECK-LABEL: ext_i8:
632-
; CHECK: // %bb.0:
633-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255
634-
; CHECK-NEXT: ret
631+
; SVE-LABEL: ext_i8:
632+
; SVE: // %bb.0:
633+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #255
634+
; SVE-NEXT: ret
635+
;
636+
; SVE2-LABEL: ext_i8:
637+
; SVE2: // %bb.0:
638+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
639+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
640+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #255
641+
; SVE2-NEXT: ret
635642
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
636643
<vscale x 16 x i8> %b,
637644
i32 255)
638645
ret <vscale x 16 x i8> %out
639646
}
640647

641648
define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
642-
; CHECK-LABEL: ext_i16:
643-
; CHECK: // %bb.0:
644-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
645-
; CHECK-NEXT: ret
649+
; SVE-LABEL: ext_i16:
650+
; SVE: // %bb.0:
651+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #0
652+
; SVE-NEXT: ret
653+
;
654+
; SVE2-LABEL: ext_i16:
655+
; SVE2: // %bb.0:
656+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
657+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
658+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #0
659+
; SVE2-NEXT: ret
646660
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
647661
<vscale x 8 x i16> %b,
648662
i32 0)
649663
ret <vscale x 8 x i16> %out
650664
}
651665

652666
define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
653-
; CHECK-LABEL: ext_i32:
654-
; CHECK: // %bb.0:
655-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
656-
; CHECK-NEXT: ret
667+
; SVE-LABEL: ext_i32:
668+
; SVE: // %bb.0:
669+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
670+
; SVE-NEXT: ret
671+
;
672+
; SVE2-LABEL: ext_i32:
673+
; SVE2: // %bb.0:
674+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
675+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
676+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
677+
; SVE2-NEXT: ret
657678
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
658679
<vscale x 4 x i32> %b,
659680
i32 1)
660681
ret <vscale x 4 x i32> %out
661682
}
662683

663684
define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
664-
; CHECK-LABEL: ext_i64:
665-
; CHECK: // %bb.0:
666-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16
667-
; CHECK-NEXT: ret
685+
; SVE-LABEL: ext_i64:
686+
; SVE: // %bb.0:
687+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #16
688+
; SVE-NEXT: ret
689+
;
690+
; SVE2-LABEL: ext_i64:
691+
; SVE2: // %bb.0:
692+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
693+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
694+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
695+
; SVE2-NEXT: ret
668696
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
669697
<vscale x 2 x i64> %b,
670698
i32 2)
671699
ret <vscale x 2 x i64> %out
672700
}
673701

674702
define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
675-
; CHECK-LABEL: ext_bf16:
676-
; CHECK: // %bb.0:
677-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #6
678-
; CHECK-NEXT: ret
703+
; SVE-LABEL: ext_bf16:
704+
; SVE: // %bb.0:
705+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #6
706+
; SVE-NEXT: ret
707+
;
708+
; SVE2-LABEL: ext_bf16:
709+
; SVE2: // %bb.0:
710+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
711+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
712+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
713+
; SVE2-NEXT: ret
679714
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
680715
<vscale x 8 x bfloat> %b,
681716
i32 3)
682717
ret <vscale x 8 x bfloat> %out
683718
}
684719

685720
define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
686-
; CHECK-LABEL: ext_f16:
687-
; CHECK: // %bb.0:
688-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #6
689-
; CHECK-NEXT: ret
721+
; SVE-LABEL: ext_f16:
722+
; SVE: // %bb.0:
723+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #6
724+
; SVE-NEXT: ret
725+
;
726+
; SVE2-LABEL: ext_f16:
727+
; SVE2: // %bb.0:
728+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
729+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
730+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
731+
; SVE2-NEXT: ret
690732
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
691733
<vscale x 8 x half> %b,
692734
i32 3)
693735
ret <vscale x 8 x half> %out
694736
}
695737

696738
define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
697-
; CHECK-LABEL: ext_f32:
698-
; CHECK: // %bb.0:
699-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16
700-
; CHECK-NEXT: ret
739+
; SVE-LABEL: ext_f32:
740+
; SVE: // %bb.0:
741+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #16
742+
; SVE-NEXT: ret
743+
;
744+
; SVE2-LABEL: ext_f32:
745+
; SVE2: // %bb.0:
746+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
747+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
748+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
749+
; SVE2-NEXT: ret
701750
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
702751
<vscale x 4 x float> %b,
703752
i32 4)
704753
ret <vscale x 4 x float> %out
705754
}
706755

707756
define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
708-
; CHECK-LABEL: ext_f64:
709-
; CHECK: // %bb.0:
710-
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #40
711-
; CHECK-NEXT: ret
757+
; SVE-LABEL: ext_f64:
758+
; SVE: // %bb.0:
759+
; SVE-NEXT: ext z0.b, z0.b, z1.b, #40
760+
; SVE-NEXT: ret
761+
;
762+
; SVE2-LABEL: ext_f64:
763+
; SVE2: // %bb.0:
764+
; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
765+
; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
766+
; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #40
767+
; SVE2-NEXT: ret
712768
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
713769
<vscale x 2 x double> %b,
714770
i32 5)

0 commit comments

Comments
 (0)