Skip to content

Commit e4ee970

Browse files
authored
[AArch64] Implement intrinsics for F1CVTL/F2CVTL and BF1CVTL/BF2CVTL (#116959)
This patch implements the following intrinsics: 8-bit floating-point convert to deinterleaved half-precision or BFloat16. ``` c // Variant is also available for: _bf16[_mf8]_x2 svfloat16x2_t svcvtl1_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming; svfloat16x2_t svcvtl2_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming; ``` Defined in ARM-software/acle#323 Co-authored-by: Caroline Concatto [email protected] Co-authored-by: Marian Lukac [email protected]
1 parent 12ccb62 commit e4ee970

File tree

11 files changed

+213
-7
lines changed

11 files changed

+213
-7
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ namespace clang {
336336
bool isTupleSet() const { return Flags & IsTupleSet; }
337337
bool isReadZA() const { return Flags & IsReadZA; }
338338
bool isWriteZA() const { return Flags & IsWriteZA; }
339+
bool setsFPMR() const { return Flags & SetsFPMR; }
339340
bool isReductionQV() const { return Flags & IsReductionQV; }
340341
uint64_t getBits() const { return Flags; }
341342
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,14 +2422,16 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
24222422
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
24232423
}
24242424

2425-
//
2426-
// Multi-vector scaling
2427-
//
2428-
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
2425+
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
2426+
// Multi-vector scaling
24292427
def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>;
24302428
def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>;
24312429
def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>;
24322430
def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;
2431+
2432+
// Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
2433+
def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>;
2434+
def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>;
24332435
}
24342436

24352437
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ include "arm_immcheck_incl.td"
9494
// l: int64_t
9595
// m: uint32_t
9696
// n: uint64_t
97+
// >: fpm_t
9798

9899
// [: svuint8_t
99100
// t: svint32_t
@@ -103,6 +104,7 @@ include "arm_immcheck_incl.td"
103104
// M: svfloat32_t
104105
// N: svfloat64_t
105106
// $: svbfloat16_t
107+
// ~: svmfloat8_t
106108

107109
// J: Prefetch type (sv_prfop)
108110

@@ -235,6 +237,7 @@ def IsInOutZA : FlagType<0x200000000000>;
235237
def IsInZT0 : FlagType<0x400000000000>;
236238
def IsOutZT0 : FlagType<0x800000000000>;
237239
def IsInOutZT0 : FlagType<0x1000000000000>;
240+
def SetsFPMR : FlagType<0x2000000000000>;
238241

239242
defvar InvalidMode = "";
240243

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10859,6 +10859,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1085910859
else if (TypeFlags.isUndef())
1086010860
return UndefValue::get(Ty);
1086110861
else if (Builtin->LLVMIntrinsic != 0) {
10862+
// Emit set FPMR for intrinsics that require it
10863+
if (TypeFlags.setsFPMR())
10864+
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10865+
Ops.pop_back_val());
1086210866
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
1086310867
InsertExplicitZeroOperand(Builder, Ty, Ops);
1086410868

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
8+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
10+
11+
#include <arm_sve.h>
12+
13+
#ifdef SVE_OVERLOADED_FORMS
14+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
15+
#else
16+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
17+
#endif
18+
19+
// CHECK-LABEL: @test_cvtl1_f16_x2(
20+
// CHECK-NEXT: entry:
21+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
22+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
23+
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
24+
//
25+
// CPP-CHECK-LABEL: @_Z17test_cvtl1_f16_x2u13__SVMfloat8_tm(
26+
// CPP-CHECK-NEXT: entry:
27+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
28+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
29+
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
30+
//
31+
svfloat16x2_t test_cvtl1_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
32+
return SVE_ACLE_FUNC(svcvtl1_f16,_mf8,_x2_fpm)(zn, fpmr);
33+
}
34+
35+
// CHECK-LABEL: @test_cvtl2_f16_x2(
36+
// CHECK-NEXT: entry:
37+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
38+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
39+
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
40+
//
41+
// CPP-CHECK-LABEL: @_Z17test_cvtl2_f16_x2u13__SVMfloat8_tm(
42+
// CPP-CHECK-NEXT: entry:
43+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
44+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
45+
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
46+
//
47+
svfloat16x2_t test_cvtl2_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
48+
return SVE_ACLE_FUNC(svcvtl2_f16,_mf8,_x2_fpm)(zn, fpmr);
49+
}
50+
51+
// CHECK-LABEL: @test_cvtl1_bf16_x2(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
54+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
55+
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
56+
//
57+
// CPP-CHECK-LABEL: @_Z18test_cvtl1_bf16_x2u13__SVMfloat8_tm(
58+
// CPP-CHECK-NEXT: entry:
59+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
60+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
61+
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
62+
//
63+
svbfloat16x2_t test_cvtl1_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
64+
return SVE_ACLE_FUNC(svcvtl1_bf16,_mf8,_x2_fpm)(zn, fpmr);
65+
}
66+
67+
// CHECK-LABEL: @test_cvtl2_bf16_x2(
68+
// CHECK-NEXT: entry:
69+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
70+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
71+
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
72+
//
73+
// CPP-CHECK-LABEL: @_Z18test_cvtl2_bf16_x2u13__SVMfloat8_tm(
74+
// CPP-CHECK-NEXT: entry:
75+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
76+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
77+
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
78+
//
79+
svbfloat16x2_t test_cvtl2_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
80+
return SVE_ACLE_FUNC(svcvtl2_bf16,_mf8,_x2_fpm)(zn, fpmr);
81+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -verify -emit-llvm-only %s
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
#include <arm_sve.h>
6+
7+
8+
void test_features_sme2_fp8(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
9+
// expected-error@+1 {{'svcvtl1_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
10+
svcvtl1_f16_mf8_x2_fpm(zn, fpmr);
11+
// expected-error@+1 {{'svcvtl2_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
12+
svcvtl2_f16_mf8_x2_fpm(zn, fpmr);
13+
// expected-error@+1 {{'svcvtl1_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
14+
svcvtl1_bf16_mf8_x2_fpm(zn, fpmr);
15+
// expected-error@+1 {{'svcvtl2_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
16+
svcvtl2_bf16_mf8_x2_fpm(zn, fpmr);
17+
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ namespace {
5252
class SVEType {
5353
bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat, MFloat;
5454
bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
55-
Svcount;
55+
Svcount, Fpm;
5656
unsigned Bitwidth, ElementBitwidth, NumVectors;
5757

5858
public:
@@ -62,7 +62,7 @@ class SVEType {
6262
: Float(false), Signed(true), Immediate(false), Void(false),
6363
Constant(false), Pointer(false), BFloat(false), MFloat(false),
6464
DefaultType(false), IsScalable(true), Predicate(false),
65-
PredicatePattern(false), PrefetchOp(false), Svcount(false),
65+
PredicatePattern(false), PrefetchOp(false), Svcount(false), Fpm(false),
6666
Bitwidth(128), ElementBitwidth(~0U), NumVectors(NumVectors) {
6767
if (!TS.empty())
6868
applyTypespec(TS);
@@ -101,6 +101,7 @@ class SVEType {
101101
bool isPrefetchOp() const { return PrefetchOp; }
102102
bool isSvcount() const { return Svcount; }
103103
bool isConstant() const { return Constant; }
104+
bool isFpm() const { return Fpm; }
104105
unsigned getElementSizeInBits() const { return ElementBitwidth; }
105106
unsigned getNumVectors() const { return NumVectors; }
106107

@@ -497,6 +498,9 @@ std::string SVEType::str() const {
497498
if (isPrefetchOp())
498499
return "enum svprfop";
499500

501+
if (isFpm())
502+
return "fpm_t";
503+
500504
std::string S;
501505
if (Void)
502506
S += "void";
@@ -752,6 +756,9 @@ void SVEType::applyModifier(char Mod) {
752756
ElementBitwidth = Bitwidth = 32;
753757
NumVectors = 0;
754758
break;
759+
case '>':
760+
Fpm = true;
761+
[[fallthrough]];
755762
case 'n':
756763
Predicate = false;
757764
Svcount = false;
@@ -926,6 +933,12 @@ void SVEType::applyModifier(char Mod) {
926933
Float = false;
927934
BFloat = false;
928935
break;
936+
case '~':
937+
Float = false;
938+
BFloat = false;
939+
MFloat = true;
940+
ElementBitwidth = 8;
941+
break;
929942
case '.':
930943
llvm_unreachable(". is never a type in itself");
931944
break;

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3813,6 +3813,15 @@ let TargetPrefix = "aarch64" in {
38133813
LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
38143814
[IntrNoMem]>;
38153815

3816+
class SME2_FP8_CVT_X2_Single_Intrinsic
3817+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3818+
[llvm_nxv16i8_ty],
3819+
[IntrReadMem, IntrInaccessibleMemOnly]>;
3820+
//
3821+
// CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector
3822+
//
3823+
def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
3824+
def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
38163825
}
38173826

38183827
// SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
383383
void SelectPExtPair(SDNode *N, unsigned Opc);
384384
void SelectWhilePair(SDNode *N, unsigned Opc);
385385
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
386+
void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
386387
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
387388
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
388389
bool IsTupleInput, unsigned Opc);
@@ -1866,6 +1867,27 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
18661867
CurDAG->RemoveDeadNode(N);
18671868
}
18681869

1870+
void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1871+
unsigned Opcode) {
1872+
SDLoc DL(N);
1873+
EVT VT = N->getValueType(0);
1874+
SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1875+
Ops.push_back(/*Chain*/ N->getOperand(0));
1876+
1877+
SDNode *Instruction =
1878+
CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1879+
SDValue SuperReg = SDValue(Instruction, 0);
1880+
1881+
for (unsigned i = 0; i < NumVecs; ++i)
1882+
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1883+
AArch64::zsub0 + i, DL, VT, SuperReg));
1884+
1885+
// Copy chain
1886+
unsigned ChainIdx = NumVecs;
1887+
ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1888+
CurDAG->RemoveDeadNode(N);
1889+
}
1890+
18691891
void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
18701892
unsigned NumVecs,
18711893
bool IsZmMulti,
@@ -5547,6 +5569,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55475569
SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
55485570
return;
55495571
}
5572+
case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5573+
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5574+
Node->getValueType(0),
5575+
{AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5576+
SelectCVTIntrinsicFP8(Node, 2, Opc);
5577+
return;
5578+
case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5579+
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5580+
Node->getValueType(0),
5581+
{AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5582+
SelectCVTIntrinsicFP8(Node, 2, Opc);
5583+
return;
55505584
}
55515585
} break;
55525586
case ISD::INTRINSIC_WO_CHAIN: {

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2412,7 +2412,7 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
24122412

24132413
// SME2 multi-vec FP8 up convert two registers
24142414
multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
2415-
def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{
2415+
def NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{
24162416
let Uses = [FPMR, FPCR];
24172417
}
24182418
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+fp8 -verify-machineinstrs -force-streaming < %s | FileCheck %s
3+
4+
; F1CVTL / F2CVTL
5+
6+
define { <vscale x 8 x half>, <vscale x 8 x half> } @f1cvtl(<vscale x 16 x i8> %zm) {
7+
; CHECK-LABEL: f1cvtl:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: f1cvtl { z0.h, z1.h }, z0.b
10+
; CHECK-NEXT: ret
11+
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> %zm)
12+
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
13+
}
14+
15+
define { <vscale x 8 x half>, <vscale x 8 x half> } @f2cvtl(<vscale x 16 x i8> %zm) {
16+
; CHECK-LABEL: f2cvtl:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: f2cvtl { z0.h, z1.h }, z0.b
19+
; CHECK-NEXT: ret
20+
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxvbf16(<vscale x 16 x i8> %zm)
21+
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
22+
}
23+
24+
; BF1CVTL / BF2CVTL
25+
26+
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf1cvtl(<vscale x 16 x i8> %zm) {
27+
; CHECK-LABEL: bf1cvtl:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: bf1cvtl { z0.h, z1.h }, z0.b
30+
; CHECK-NEXT: ret
31+
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> %zm)
32+
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
33+
}
34+
35+
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf2cvtl( <vscale x 16 x i8> %zm) {
36+
; CHECK-LABEL: bf2cvtl:
37+
; CHECK: // %bb.0:
38+
; CHECK-NEXT: bf2cvtl { z0.h, z1.h }, z0.b
39+
; CHECK-NEXT: ret
40+
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> %zm)
41+
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
42+
}

0 commit comments

Comments
 (0)