Skip to content

Commit d019d05

Browse files
[LLVM][CodeGen][AArch64] Add isel for i8/i16 sve.compact intrinsics. (#143139)
The i8/i16 instruction variants are available for: * normal functions using sve2p2 * streaming functions using sme2p2
1 parent acc43db commit d019d05

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4499,7 +4499,7 @@ let Predicates = [HasNonStreamingSVE2p2_or_SME2p2] in {
44994499
// SVE2 EXPAND
45004500
defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">;
45014501
// SVE COMPACT - byte and halfword
4502-
defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact">;
4502+
defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact", int_aarch64_sve_compact>;
45034503
}
45044504

45054505
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7884,9 +7884,14 @@ multiclass sve_int_perm_compact_sd<string asm, SDPatternOperator op> {
78847884
def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
78857885
}
78867886

7887-
multiclass sve_int_perm_compact_bh<string asm> {
7887+
multiclass sve_int_perm_compact_bh<string asm, SDPatternOperator op> {
78887888
def _B : sve_int_perm_compact<0b00, asm, ZPR8>;
78897889
def _H : sve_int_perm_compact<0b01, asm, ZPR16>;
7890+
7891+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
7892+
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
7893+
def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
7894+
def : SVE_2_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>;
78907895
}
78917896

78927897
//===----------------------------------------------------------------------===//
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
4+
5+
;
6+
; COMPACT
7+
;
8+
9+
define <vscale x 16 x i8> @compact_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
10+
; CHECK-LABEL: compact_i8:
11+
; CHECK: // %bb.0:
12+
; CHECK-NEXT: compact z0.b, p0, z0.b
13+
; CHECK-NEXT: ret
14+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> %pg,
15+
<vscale x 16 x i8> %a)
16+
ret <vscale x 16 x i8> %out
17+
}
18+
19+
define <vscale x 8 x i16> @compact_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
20+
; CHECK-LABEL: compact_i16:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: compact z0.h, p0, z0.h
23+
; CHECK-NEXT: ret
24+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> %pg,
25+
<vscale x 8 x i16> %a)
26+
ret <vscale x 8 x i16> %out
27+
}
28+
29+
define <vscale x 8 x half> @compact_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
30+
; CHECK-LABEL: compact_f16:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: compact z0.h, p0, z0.h
33+
; CHECK-NEXT: ret
34+
%out = call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> %pg,
35+
<vscale x 8 x half> %a)
36+
ret <vscale x 8 x half> %out
37+
}
38+
39+
define <vscale x 8 x bfloat> @compact_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) {
40+
; CHECK-LABEL: compact_bf16:
41+
; CHECK: // %bb.0:
42+
; CHECK-NEXT: compact z0.h, p0, z0.h
43+
; CHECK-NEXT: ret
44+
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> %pg,
45+
<vscale x 8 x bfloat> %a)
46+
ret <vscale x 8 x bfloat> %out
47+
}
48+
49+
declare <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
50+
declare <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
51+
declare <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
52+
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)

0 commit comments

Comments
 (0)