[AArch64][SME2] Add PEXT, PSEL builtins for SME2 #72827

dtemirbulatov · 2023-11-20T07:07:19Z

This change enables PEXT, PSEL builtins for SME2 target.

llvmbot · 2023-11-20T07:07:52Z

@llvm/pr-subscribers-clang

Author: Dinar Temirbulatov (dtemirbulatov)

Changes

This change enables PEXT, PSEL builtins for SME2 target.

Patch is 25.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72827.diff

3 Files Affected:

(modified) clang/include/clang/Basic/arm_sve.td (+17-13)
(modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c (+196-11)
(modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c (+5)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..96c5ac366574bf2 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", "UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingCompatible], []>;
+}
+
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>;
+
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>;
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
-
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -1971,11 +1980,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f2f..76603e384b99ca3 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve2p1 -S -DIGNORE_STREAMING_ATTR -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#ifdef IGNORE_STREAMING_ATTR
+#define __attribute__(...)
+#endif
+
 // CHECK-LABEL: @test_svpext_lane_c8_0(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
@@ -15,7 +22,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpext_lane_c8_0(svcount_t c) {
+svbool_t test_svpext_lane_c8_0(svcount_t c) __arm_streaming {
   return svpext_lane_c8(c, 0);
 }
 
@@ -29,7 +36,7 @@ svbool_t test_svpext_lane_c8_0(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpext_lane_c8_3(svcount_t c) {
+svbool_t test_svpext_lane_c8_3(svcount_t c) __arm_streaming {
   return svpext_lane_c8(c, 3);
 }
 
@@ -45,7 +52,7 @@ svbool_t test_svpext_lane_c8_3(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c16_0(svcount_t c) {
+svbool_t test_svpext_lane_c16_0(svcount_t c) __arm_streaming {
   return svpext_lane_c16(c, 0);
 }
 
@@ -61,7 +68,7 @@ svbool_t test_svpext_lane_c16_0(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c16_3(svcount_t c) {
+svbool_t test_svpext_lane_c16_3(svcount_t c) __arm_streaming {
   return svpext_lane_c16(c, 3);
 }
 
@@ -77,7 +84,7 @@ svbool_t test_svpext_lane_c16_3(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c32_0(svcount_t c) {
+svbool_t test_svpext_lane_c32_0(svcount_t c) __arm_streaming {
   return svpext_lane_c32(c, 0);
 }
 
@@ -93,7 +100,7 @@ svbool_t test_svpext_lane_c32_0(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c32_3(svcount_t c) {
+svbool_t test_svpext_lane_c32_3(svcount_t c) __arm_streaming {
   return svpext_lane_c32(c, 3);
 }
 
@@ -109,7 +116,7 @@ svbool_t test_svpext_lane_c32_3(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c64_0(svcount_t c) {
+svbool_t test_svpext_lane_c64_0(svcount_t c) __arm_streaming {
   return svpext_lane_c64(c, 0);
 }
 
@@ -125,7 +132,7 @@ svbool_t test_svpext_lane_c64_0(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpext_lane_c64_3(svcount_t c) {
+svbool_t test_svpext_lane_c64_3(svcount_t c) __arm_streaming {
   return svpext_lane_c64(c, 3);
 }
 
@@ -147,6 +154,184 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) {
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
 //
-svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) {
+svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) __arm_streaming {
   return svpext_lane_c8_x2(c, 0);
 }
+
+// CHECK-LABEL: @test_svpext_lane_c8_x2_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
+//
+svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) __arm_streaming {
+  return svpext_lane_c8_x2(c, 1);
+}
+
+// CHECK-LABEL: @test_svpext_lane_c16_x2_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) __arm_streaming {
+  return svpext_lane_c16_x2(c, 0);
+}
+
+// CHECK-LABEL: @test_svpext_lane_c16_x2_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) __arm_streaming {
+  return svpext_lane_c16_x2(c, 1);
+}
+
+// CHECK-LABEL: @test_svpext_lane_c32_x2_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) __arm_streaming {
+  return svpext_lane_c32_x2(c, 0);
+}
+
+// CHECK-LABEL: @test_svpext_lane_c32_x2_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP4]])
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:...
[truncated]

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c

clang/include/clang/Basic/arm_sve.td

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c

clang/include/clang/Basic/arm_sve.td

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c

sdesmalen-arm · 2023-12-08T13:50:33Z

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c

+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -DTEST_SME2 -target-feature +sve -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -DTEST_SME2 -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s


nit: I think it's sufficient to test these two RUN lines with only +sve2p1.

This change enables PEXT, PSEL builtins for SME2 target.

… target.

…xt.c test.

dtemirbulatov requested review from SamTebbs33, sdesmalen-arm, CarolineConcatto, kmclaughlin-arm and MDevereau November 20, 2023 07:07

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Nov 20, 2023

CarolineConcatto reviewed Nov 21, 2023

View reviewed changes

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c Show resolved Hide resolved

MDevereau reviewed Nov 24, 2023

View reviewed changes

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c Outdated Show resolved Hide resolved

clang/include/clang/Basic/arm_sve.td Show resolved Hide resolved

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c Show resolved Hide resolved

sdesmalen-arm reviewed Nov 27, 2023

View reviewed changes

sdesmalen-arm approved these changes Dec 8, 2023

View reviewed changes

Dinar Temirbulatov added 5 commits December 11, 2023 14:05

[AArch64][SME2] Add PEXT, PSEL builtins for SME2

ae1b183

This change enables PEXT, PSEL builtins for SME2 target.

Fixed remarks.

fe41261

Temporory removed IsStreamingOrSVE2p1.

34f1c31

Added __arm_streaming attribute in acle_sve2p1_pext.c test for sve2p1…

3f13dd2

… target.

Adjusting clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pe…

d90683d

…xt.c test.

dtemirbulatov force-pushed the sme2-psel-pext branch from f220cb4 to d90683d Compare December 11, 2023 14:09

dtemirbulatov merged commit 05b68d5 into llvm:main Dec 11, 2023

kmclaughlin-arm mentioned this pull request Dec 14, 2023

[Clang][SVE2.1] Update names of the svwhileXX builtins with predicate-as-counter #75200

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64][SME2] Add PEXT, PSEL builtins for SME2 #72827

[AArch64][SME2] Add PEXT, PSEL builtins for SME2 #72827

Uh oh!

dtemirbulatov commented Nov 20, 2023

Uh oh!

llvmbot commented Nov 20, 2023

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

sdesmalen-arm Dec 8, 2023

Uh oh!

Uh oh!

		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -DTEST_SME2 -target-feature +sve -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s \| FileCheck %s -check-prefix=CPP-CHECK
		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -DTEST_SME2 -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

[AArch64][SME2] Add PEXT, PSEL builtins for SME2 #72827

[AArch64][SME2] Add PEXT, PSEL builtins for SME2 #72827

Uh oh!

Conversation

dtemirbulatov commented Nov 20, 2023

Uh oh!

llvmbot commented Nov 20, 2023

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

sdesmalen-arm Dec 8, 2023

Choose a reason for hiding this comment

Uh oh!

Uh oh!