-
Notifications
You must be signed in to change notification settings - Fork 14.3k
allow prefer 256 bit attribute target #117092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
allow prefer 256 bit attribute target #117092
Conversation
Rework the attr-target-x86 test so the CHECK lines for the attributes are next to their corresponding `__attribute__`.
Note that this has the test changes from #117091 included. |
@llvm/pr-subscribers-clang Author: Matthias Braun (MatzeB) ChangesIt would be useful for us to have Full diff: https://github.com/llvm/llvm-project/pull/117092.diff 2 Files Affected:
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5993257e27d5a9..e903e16032bf02 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1162,6 +1162,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pconfig", true)
.Case("pku", true)
.Case("popcnt", true)
+ .Case("prefer-256-bit", true)
.Case("prefetchi", true)
.Case("prfchw", true)
.Case("ptwrite", true)
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 2033a8b4c335f9..e9264efaa85c4f 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -1,80 +1,115 @@
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 -emit-llvm %s -o - | FileCheck %s
-int baz(int a) { return 4; }
+// CHECK: define {{.*}}@f_default({{.*}} [[f_default:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge({{.*}} [[f_avx_sse4_2_ivybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_fpmath_387({{.*}} [[f_default]]
+// CHECK: define {{.*}}@f_no_sse2({{.*}} [[f_no_sse2:#[0-9]+]]
+// CHECK: define {{.*}}@f_sse4({{.*}} [[f_sse4:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_sse4({{.*}} [[f_no_sse4:#[0-9]+]]
+// CHECK: define {{.*}}@f_default2({{.*}} [[f_default]]
+// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge_2({{.*}} [[f_avx_sse4_2_ivybridge]]
+// CHECK: define {{.*}}@f_no_aes_ivybridge({{.*}} [[f_no_aes_ivybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_mmx({{.*}} [[f_no_mmx:#[0-9]+]]
+// CHECK: define {{.*}}@f_lakemont_mmx({{.*}} [[f_lakemont_mmx:#[0-9]+]]
+// CHECK: define {{.*}}@f_use_before_def({{.*}} [[f_lakemont_mmx]]
+// CHECK: define {{.*}}@f_tune_sandybridge({{.*}} [[f_tune_sandybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v2({{.*}} [[f_x86_64_v2:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v3({{.*}} [[f_x86_64_v3:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx10_1_256{{.*}} [[f_avx10_1_256:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx10_1_512{{.*}} [[f_avx10_1_512:#[0-9]+]]
+// CHECK: define {{.*}}@f_prefer_256_bit({{.*}} [[f_prefer_256_bit:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_prefer_256_bit({{.*}} [[f_no_prefer_256_bit:#[0-9]+]]
+
+// CHECK: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
+void f_default(void) {}
+
+// CHECK: [[f_avx_sse4_2_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+__attribute__((target("avx,sse4.2,arch=ivybridge")))
+void f_avx_sse4_2_ivybridge(void) {}
+
+// We're currently ignoring the fpmath attribute. So checked above that
+// attributes are identical to f_default.
+__attribute__((target("fpmath=387")))
+void f_fpmath_387(void) {}
-int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
-
-int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
-
-int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
-
-int __attribute__((target("sse4"))) panda(int a) { return 4; }
-int __attribute__((target("no-sse4"))) narwhal(int a) { return 4; }
+// CHECK-NOT: tune-cpu
+// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+__attribute__((target("no-sse2")))
+void f_no_sse2(void) {}
+
+// CHECK: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
+__attribute__((target("sse4")))
+void f_sse4(void) {}
+
+// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+__attribute__((target("no-sse4")))
+void f_no_sse4(void) {}
+
+// checked above that attributes are identical to f_default
+void f_default2(void) {
+ f_avx_sse4_2_ivybridge();
+ return f_default();
+}
-int bar(int a) { return baz(a) + foo(a); }
+// Checked above to have same attributes as f_avx_sse4_2_ivybridge
+__attribute__((target("avx, sse4.2, arch= ivybridge")))
+void f_avx_sse4_2_ivybridge_2(void) {}
-int __attribute__((target("avx, sse4.2, arch= ivybridge"))) qux(int a) { return 4; }
-int __attribute__((target("no-aes, arch=ivybridge"))) qax(int a) { return 4; }
+// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
+__attribute__((target("no-aes, arch=ivybridge")))
+void f_no_aes_ivybridge(void) {}
-int __attribute__((target("no-mmx"))) qq(int a) { return 40; }
+// CHECK-NOT: tune-cpu
+// CHECK: [[f_no_mmx]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
+__attribute__((target("no-mmx")))
+void f_no_mmx(void) {}
-int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
+// CHECK: [[f_lakemont_mmx]] = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
+// Adding the attribute to a definition does update it in IR.
+__attribute__((target("arch=lakemont,mmx")))
+void f_lakemont_mmx(void) {}
-int use_before_def(void);
-int useage(void){
- return use_before_def();
+void f_use_before_def(void);
+void usage(void){
+ f_use_before_def();
}
-// Adding the attribute to a definition does update it in IR.
-int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
- return 5;
-}
+// Checked above to have same attributes as f_lakemont_mmx
+__attribute__((target("arch=lakemont,mmx")))
+void f_use_before_def(void) {}
-int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
-
-void __attribute__((target("arch=x86-64-v2"))) x86_64_v2(void) {}
-void __attribute__((target("arch=x86-64-v3"))) x86_64_v3(void) {}
-void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
-
-void __attribute__((target("avx10.1-256"))) avx10_1_256(void) {}
-void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
-
-// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
-// CHECK: baz{{.*}} #0
-// CHECK: foo{{.*}} #1
-// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
-// CHECK: koala{{.*}} #0
-// CHECK: echidna{{.*}} #2
-// CHECK: panda{{.*}} #3
-// CHECK: narwhal{{.*}} #4
-// CHECK: bar{{.*}} #0
-// CHECK: qux{{.*}} #1
-// CHECK: qax{{.*}} #5
-// CHECK: qq{{.*}} #6
-// CHECK: lake{{.*}} #7
-// CHECK: use_before_def{{.*}} #7
-// CHECK: walrus{{.*}} #8
-// CHECK: avx10_1_256{{.*}} #12
-// CHECK: avx10_1_512{{.*}} #13
-// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
-// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
-// CHECK-NOT: tune-cpu
-// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
-// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
-// CHECK-NOT: tune-cpu
-// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
+// CHECK: [[f_tune_sandybridge]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
+__attribute__((target("tune=sandybridge")))
+void f_tune_sandybridge(void) {}
-// CHECK: "target-cpu"="x86-64-v2"
+// CHECK: [[f_x86_64_v2]] ={{.*}}"target-cpu"="x86-64-v2"
// CHECK-SAME: "target-features"="+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: "target-cpu"="x86-64-v3"
+__attribute__((target("arch=x86-64-v2")))
+void f_x86_64_v2(void) {}
+
+// CHECK: [[f_x86_64_v3]] = {{.*}}"target-cpu"="x86-64-v3"
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: "target-cpu"="x86-64-v4"
+__attribute__((target("arch=x86-64-v3")))
+void f_x86_64_v3(void) {}
+
+// CHECK: [[f_x86_64_v4]] = {{.*}}"target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+__attribute__((target("arch=x86-64-v4")))
+void f_x86_64_v4(void) {}
+
+// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
+__attribute__((target("avx10.1-256")))
+void f_avx10_1_256(void) {}
+
+// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
+__attribute__((target("avx10.1-512")))
+void f_avx10_1_512(void) {}
+
+// CHECK: [[f_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}+prefer-256-bit
+__attribute__((target("prefer-256-bit")))
+void f_prefer_256_bit(void) {}
-// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
-// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
+// CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit
+__attribute__((target("no-prefer-256-bit")))
+void f_no_prefer_256_bit(void) {}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Note, prefer-256-bit
just prefers. If you want to 256-bit guaranteed, you need no-evex512
.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/9167 Here is the relevant piece of the build log for the reference
|
It would be useful for us to have
__attribue__((target("prefer-256-bit")))
/__attribue__((target("no-prefer-256-bit")))
to create variants of a functions with 256/512 bit vector sizes within the same application.