Skip to content

Commit 33c8d5c

Browse files
[Clang][AArch64] Add FP8 variants of Neon store intrinsics (#145346)
Adds FP8 variants for existing VST1, VST2, VST3 & VST4 intrinsics.
1 parent a72a0f4 commit 33c8d5c

File tree

3 files changed

+288
-12
lines changed

3 files changed

+288
-12
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -466,15 +466,15 @@ def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
466466
def VLD1_DUP : WInst<"vld1_dup", ".(c*!)",
467467
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
468468
def VST1 : WInst<"vst1", "v*(.!)",
469-
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
469+
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
470470
def VST1_X2 : WInst<"vst1_x2", "v*(2!)",
471-
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
471+
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
472472
def VST1_X3 : WInst<"vst1_x3", "v*(3!)",
473-
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
473+
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
474474
def VST1_X4 : WInst<"vst1_x4", "v*(4!)",
475-
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
475+
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
476476
def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
477-
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
477+
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm",
478478
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;
479479

480480
let ArchGuard = "(__ARM_FP & 2)" in {
@@ -510,14 +510,14 @@ def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm
510510
[ImmCheck<5, ImmCheckLaneIndex, 1>]>;
511511
def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
512512
[ImmCheck<6, ImmCheckLaneIndex, 1>]>;
513-
def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
514-
def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
515-
def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
516-
def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
513+
def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
514+
def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
515+
def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
516+
def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
517517
[ImmCheck<3, ImmCheckLaneIndex, 1>]>;
518-
def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
518+
def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
519519
[ImmCheck<4, ImmCheckLaneIndex, 1>]>;
520-
def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
520+
def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
521521
[ImmCheck<5, ImmCheckLaneIndex, 1>]>;
522522
let ArchGuard = "(__ARM_FP & 2)" in {
523523
def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">;
@@ -2194,4 +2194,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
21942194
// fscale
21952195
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
21962196
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
2197-
}
2197+
}

clang/test/CodeGen/AArch64/neon-intrinsics.c

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14848,6 +14848,16 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) {
1484814848
vst1q_s64(a, b);
1484914849
}
1485014850

14851+
// CHECK-LABEL: define dso_local void @test_vst1q_mf8(
14852+
// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> [[VAL:%.*]]) #[[ATTR0]] {
14853+
// CHECK-NEXT: [[ENTRY:.*:]]
14854+
// CHECK-NEXT: store <16 x i8> [[VAL]], ptr [[A]], align 1
14855+
// CHECK-NEXT: ret void
14856+
//
14857+
void test_vst1q_mf8(mfloat8_t *a, mfloat8x16_t val) {
14858+
vst1q_mf8(a, val);
14859+
}
14860+
1485114861
// CHECK-LABEL: define dso_local void @test_vst1q_f16(
1485214862
// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
1485314863
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -15001,6 +15011,16 @@ void test_vst1_s64(int64_t *a, int64x1_t b) {
1500115011
vst1_s64(a, b);
1500215012
}
1500315013

15014+
// CHECK-LABEL: define dso_local void @test_vst1_mf8(
15015+
// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> [[VAL:%.*]]) #[[ATTR0]] {
15016+
// CHECK-NEXT: [[ENTRY:.*:]]
15017+
// CHECK-NEXT: store <8 x i8> [[VAL]], ptr [[A]], align 1
15018+
// CHECK-NEXT: ret void
15019+
//
15020+
void test_vst1_mf8(mfloat8_t *a, mfloat8x8_t val) {
15021+
vst1_mf8(a, val);
15022+
}
15023+
1500415024
// CHECK-LABEL: define dso_local void @test_vst1_f16(
1500515025
// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
1500615026
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -15183,6 +15203,18 @@ void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
1518315203
vst2q_s64(a, b);
1518415204
}
1518515205

15206+
// CHECK-LABEL: define dso_local void @test_vst2q_mf8(
15207+
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
15208+
// CHECK-NEXT: [[ENTRY:.*:]]
15209+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
15210+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
15211+
// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
15212+
// CHECK-NEXT: ret void
15213+
//
15214+
void test_vst2q_mf8(mfloat8_t *a, mfloat8x16x2_t b) {
15215+
vst2q_mf8(a, b);
15216+
}
15217+
1518615218
// CHECK-LABEL: define dso_local void @test_vst2q_f16(
1518715219
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1518815220
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -15385,6 +15417,18 @@ void test_vst2_s64(int64_t *a, int64x1x2_t b) {
1538515417
vst2_s64(a, b);
1538615418
}
1538715419

15420+
// CHECK-LABEL: define dso_local void @test_vst2_mf8(
15421+
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
15422+
// CHECK-NEXT: [[ENTRY:.*:]]
15423+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
15424+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
15425+
// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
15426+
// CHECK-NEXT: ret void
15427+
//
15428+
void test_vst2_mf8(mfloat8_t *a, mfloat8x8x2_t b) {
15429+
vst2_mf8(a, b);
15430+
}
15431+
1538815432
// CHECK-LABEL: define dso_local void @test_vst2_f16(
1538915433
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1539015434
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -15609,6 +15653,19 @@ void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
1560915653
vst3q_s64(a, b);
1561015654
}
1561115655

15656+
// CHECK-LABEL: define dso_local void @test_vst3q_mf8(
15657+
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
15658+
// CHECK-NEXT: [[ENTRY:.*:]]
15659+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
15660+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
15661+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
15662+
// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
15663+
// CHECK-NEXT: ret void
15664+
//
15665+
void test_vst3q_mf8(mfloat8_t *a, mfloat8x16x3_t b) {
15666+
vst3q_mf8(a, b);
15667+
}
15668+
1561215669
// CHECK-LABEL: define dso_local void @test_vst3q_f16(
1561315670
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1561415671
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -15847,6 +15904,19 @@ void test_vst3_s64(int64_t *a, int64x1x3_t b) {
1584715904
vst3_s64(a, b);
1584815905
}
1584915906

15907+
// CHECK-LABEL: define dso_local void @test_vst3_mf8(
15908+
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
15909+
// CHECK-NEXT: [[ENTRY:.*:]]
15910+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
15911+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
15912+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
15913+
// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
15914+
// CHECK-NEXT: ret void
15915+
//
15916+
void test_vst3_mf8(mfloat8_t *a, mfloat8x8x3_t b) {
15917+
vst3_mf8(a, b);
15918+
}
15919+
1585015920
// CHECK-LABEL: define dso_local void @test_vst3_f16(
1585115921
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1585215922
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16108,6 +16178,20 @@ void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
1610816178
vst4q_s64(a, b);
1610916179
}
1611016180

16181+
// CHECK-LABEL: define dso_local void @test_vst4q_mf8(
16182+
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
16183+
// CHECK-NEXT: [[ENTRY:.*:]]
16184+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
16185+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
16186+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
16187+
// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
16188+
// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
16189+
// CHECK-NEXT: ret void
16190+
//
16191+
void test_vst4q_mf8(mfloat8_t *a, mfloat8x16x4_t b) {
16192+
vst4q_mf8(a, b);
16193+
}
16194+
1611116195
// CHECK-LABEL: define dso_local void @test_vst4q_f16(
1611216196
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1611316197
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16382,6 +16466,20 @@ void test_vst4_s64(int64_t *a, int64x1x4_t b) {
1638216466
vst4_s64(a, b);
1638316467
}
1638416468

16469+
// CHECK-LABEL: define dso_local void @test_vst4_mf8(
16470+
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
16471+
// CHECK-NEXT: [[ENTRY:.*:]]
16472+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
16473+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
16474+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
16475+
// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
16476+
// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
16477+
// CHECK-NEXT: ret void
16478+
//
16479+
void test_vst4_mf8(mfloat8_t *a, mfloat8x8x4_t b) {
16480+
vst4_mf8(a, b);
16481+
}
16482+
1638516483
// CHECK-LABEL: define dso_local void @test_vst4_f16(
1638616484
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1638716485
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16789,6 +16887,18 @@ poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
1678916887
return vld1_p64_x4(a);
1679016888
}
1679116889

16890+
// CHECK-LABEL: define dso_local void @test_vst1q_mf8_x2(
16891+
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
16892+
// CHECK-NEXT: [[ENTRY:.*:]]
16893+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
16894+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
16895+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
16896+
// CHECK-NEXT: ret void
16897+
//
16898+
void test_vst1q_mf8_x2(mfloat8_t *a, mfloat8x16x2_t b) {
16899+
vst1q_mf8_x2(a, b);
16900+
}
16901+
1679216902
// CHECK-LABEL: define dso_local void @test_vst1q_f64_x2(
1679316903
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1679416904
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16823,6 +16933,18 @@ void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
1682316933
vst1q_p64_x2(a, b);
1682416934
}
1682516935

16936+
// CHECK-LABEL: define dso_local void @test_vst1_mf8_x2(
16937+
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
16938+
// CHECK-NEXT: [[ENTRY:.*:]]
16939+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
16940+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
16941+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
16942+
// CHECK-NEXT: ret void
16943+
//
16944+
void test_vst1_mf8_x2(mfloat8_t *a, mfloat8x8x2_t b) {
16945+
vst1_mf8_x2(a, b);
16946+
}
16947+
1682616948
// CHECK-LABEL: define dso_local void @test_vst1_f64_x2(
1682716949
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1682816950
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16859,6 +16981,19 @@ void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
1685916981
vst1_p64_x2(a, b);
1686016982
}
1686116983

16984+
// CHECK-LABEL: define dso_local void @test_vst1q_mf8_x3(
16985+
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
16986+
// CHECK-NEXT: [[ENTRY:.*:]]
16987+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
16988+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
16989+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
16990+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
16991+
// CHECK-NEXT: ret void
16992+
//
16993+
void test_vst1q_mf8_x3(mfloat8_t *a, mfloat8x16x3_t b) {
16994+
vst1q_mf8_x3(a, b);
16995+
}
16996+
1686216997
// CHECK-LABEL: define dso_local void @test_vst1q_f64_x3(
1686316998
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1686416999
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16900,6 +17035,19 @@ void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
1690017035
vst1q_p64_x3(a, b);
1690117036
}
1690217037

17038+
// CHECK-LABEL: define dso_local void @test_vst1_mf8_x3(
17039+
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
17040+
// CHECK-NEXT: [[ENTRY:.*:]]
17041+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
17042+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
17043+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
17044+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
17045+
// CHECK-NEXT: ret void
17046+
//
17047+
void test_vst1_mf8_x3(mfloat8_t *a, mfloat8x8x3_t b) {
17048+
vst1_mf8_x3(a, b);
17049+
}
17050+
1690317051
// CHECK-LABEL: define dso_local void @test_vst1_f64_x3(
1690417052
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1690517053
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16944,6 +17092,20 @@ void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
1694417092
vst1_p64_x3(a, b);
1694517093
}
1694617094

17095+
// CHECK-LABEL: define dso_local void @test_vst1q_mf8_x4(
17096+
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
17097+
// CHECK-NEXT: [[ENTRY:.*:]]
17098+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
17099+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
17100+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
17101+
// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
17102+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
17103+
// CHECK-NEXT: ret void
17104+
//
17105+
void test_vst1q_mf8_x4(mfloat8_t *a, mfloat8x16x4_t b) {
17106+
vst1q_mf8_x4(a, b);
17107+
}
17108+
1694717109
// CHECK-LABEL: define dso_local void @test_vst1q_f64_x4(
1694817110
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
1694917111
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16992,6 +17154,20 @@ void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
1699217154
vst1q_p64_x4(a, b);
1699317155
}
1699417156

17157+
// CHECK-LABEL: define dso_local void @test_vst1_mf8_x4(
17158+
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
17159+
// CHECK-NEXT: [[ENTRY:.*:]]
17160+
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
17161+
// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
17162+
// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
17163+
// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
17164+
// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
17165+
// CHECK-NEXT: ret void
17166+
//
17167+
void test_vst1_mf8_x4(mfloat8_t *a, mfloat8x8x4_t b) {
17168+
vst1_mf8_x4(a, b);
17169+
}
17170+
1699517171
// CHECK-LABEL: define dso_local void @test_vst1_f64_x4(
1699617172
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
1699717173
// CHECK-NEXT: [[ENTRY:.*:]]

0 commit comments

Comments
 (0)