Skip to content

Commit 6f013db

Browse files
authored
[AArch64][FMV] Add missing feature dependencies and detect at runtime. (#119231)
i8mm -> simd fp16fml -> simd frintts -> fp bf16 -> simd sme -> fp16 Approved in ACLE as ARM-software/acle#368
1 parent fbe3919 commit 6f013db

15 files changed

+179
-211
lines changed

clang/test/CodeGen/AArch64/cpu-supports.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@
1818
// CHECK-NEXT: br label [[RETURN:%.*]]
1919
// CHECK: if.end:
2020
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
21-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17867063951360
22-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17867063951360
21+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17936857268992
22+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17936857268992
2323
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
2424
// CHECK-NEXT: br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]]
2525
// CHECK: if.then1:
2626
// CHECK-NEXT: store i32 2, ptr [[RETVAL]], align 4
2727
// CHECK-NEXT: br label [[RETURN]]
2828
// CHECK: if.end2:
2929
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
30-
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171136785840078848
31-
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171136785840078848
30+
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171141184020873984
31+
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171141184020873984
3232
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
3333
// CHECK-NEXT: br i1 [[TMP11]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]]
3434
// CHECK: if.then3:

clang/test/CodeGen/AArch64/fmv-dependencies.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,10 @@ int caller() {
183183
// CHECK: attributes #[[sha2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+v8a"
184184
// CHECK: attributes #[[sha3]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+sha3,+v8a"
185185
// CHECK: attributes #[[sm4]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sm4,+v8a"
186-
// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+v8a"
187-
// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
188-
// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
189-
// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a"
186+
// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+v8a"
187+
// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
188+
// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
189+
// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme2,+v8a"
190190
// CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
191191
// CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
192192
// CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"

clang/test/CodeGen/AArch64/mixed-target-attributes.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
6666
// CHECK-NEXT: resolver_entry:
6767
// CHECK-NEXT: call void @__init_cpu_features_resolver()
6868
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
69-
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
70-
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
69+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
70+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
7171
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
7272
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
7373
// CHECK: resolver_return:
7474
// CHECK-NEXT: ret ptr @explicit_default._Mjscvt
7575
// CHECK: resolver_else:
7676
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
77-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64
78-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
77+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832
78+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
7979
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
8080
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
8181
// CHECK: resolver_return1:
8282
// CHECK-NEXT: ret ptr @explicit_default._Mrdm
8383
// CHECK: resolver_else2:
8484
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
85-
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16
86-
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
85+
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784
86+
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
8787
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
8888
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
8989
// CHECK: resolver_return3:
@@ -140,24 +140,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
140140
// CHECK-NEXT: resolver_entry:
141141
// CHECK-NEXT: call void @__init_cpu_features_resolver()
142142
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
143-
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
144-
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
143+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
144+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
145145
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
146146
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
147147
// CHECK: resolver_return:
148148
// CHECK-NEXT: ret ptr @implicit_default._Mjscvt
149149
// CHECK: resolver_else:
150150
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
151-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64
152-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
151+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832
152+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
153153
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
154154
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
155155
// CHECK: resolver_return1:
156156
// CHECK-NEXT: ret ptr @implicit_default._Mrdm
157157
// CHECK: resolver_else2:
158158
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
159-
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16
160-
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
159+
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784
160+
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
161161
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
162162
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
163163
// CHECK: resolver_return3:
@@ -207,16 +207,16 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
207207
// CHECK-NEXT: resolver_entry:
208208
// CHECK-NEXT: call void @__init_cpu_features_resolver()
209209
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
210-
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
211-
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
210+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
211+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
212212
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
213213
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
214214
// CHECK: resolver_return:
215215
// CHECK-NEXT: ret ptr @default_def_with_version_decls._Mjscvt
216216
// CHECK: resolver_else:
217217
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
218-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16
219-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16
218+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 784
219+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784
220220
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
221221
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
222222
// CHECK: resolver_return1:

0 commit comments

Comments
 (0)