Skip to content

Commit 8e65940

Browse files
authored
[FMV][AArch64] Simplify version selection according to ACLE. (#121921)
Currently, the more features a version has, the higher its priority is. We are changing ACLE ARM-software/acle#370 as follows: "Among any two versions, the higher priority version is determined by identifying the highest priority feature that is specified in exactly one of the versions, and selecting that version."
1 parent 983a957 commit 8e65940

File tree

16 files changed

+270
-152
lines changed

16 files changed

+270
-152
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,7 +1531,7 @@ class TargetInfo : public TransferrableTargetInfo,
15311531

15321532
// Return the target-specific priority for features/cpus/vendors so
15331533
// that they can be properly sorted for checking.
1534-
virtual unsigned getFMVPriority(ArrayRef<StringRef> Features) const {
1534+
virtual uint64_t getFMVPriority(ArrayRef<StringRef> Features) const {
15351535
return 0;
15361536
}
15371537

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
714714
return std::nullopt;
715715
}
716716

717-
unsigned AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
717+
uint64_t AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
718718
return llvm::AArch64::getFMVPriority(Features);
719719
}
720720

clang/lib/Basic/Targets/AArch64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
137137
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
138138
bool setCPU(const std::string &Name) override;
139139

140-
unsigned getFMVPriority(ArrayRef<StringRef> Features) const override;
140+
uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
141141

142142
bool useFP16ConversionIntrinsics() const override {
143143
return false;

clang/lib/Basic/Targets/RISCV.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ ParsedTargetAttr RISCVTargetInfo::parseTargetAttr(StringRef Features) const {
489489
return Ret;
490490
}
491491

492-
unsigned RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
492+
uint64_t RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
493493
// Priority is explicitly specified on RISC-V unlike on other targets, where
494494
// it is derived by all the features of a specific version. Therefore if a
495495
// feature contains the priority string, then return it immediately.
@@ -501,7 +501,7 @@ unsigned RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
501501
Feature = RHS;
502502
else
503503
continue;
504-
unsigned Priority;
504+
uint64_t Priority;
505505
if (!Feature.getAsInteger(0, Priority))
506506
return Priority;
507507
}

clang/lib/Basic/Targets/RISCV.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class RISCVTargetInfo : public TargetInfo {
122122
void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const override;
123123
bool supportsTargetAttributeTune() const override { return true; }
124124
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
125-
unsigned getFMVPriority(ArrayRef<StringRef> Features) const override;
125+
uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
126126

127127
std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
128128
return std::make_pair(32, 32);

clang/lib/Basic/Targets/X86.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,8 +1357,8 @@ static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
13571357
// correct, so it asserts if the value is out of range.
13581358
}
13591359

1360-
unsigned X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
1361-
auto getPriority = [](StringRef Feature) -> unsigned {
1360+
uint64_t X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
1361+
auto getPriority = [](StringRef Feature) -> uint64_t {
13621362
// Valid CPUs have a 'key feature' that compares just better than its key
13631363
// feature.
13641364
using namespace llvm::X86;
@@ -1372,7 +1372,7 @@ unsigned X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
13721372
return getFeaturePriority(getFeature(Feature)) << 1;
13731373
};
13741374

1375-
unsigned Priority = 0;
1375+
uint64_t Priority = 0;
13761376
for (StringRef Feature : Features)
13771377
if (!Feature.empty())
13781378
Priority = std::max(Priority, getPriority(Feature));

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
384384
return CPU != llvm::X86::CK_None;
385385
}
386386

387-
unsigned getFMVPriority(ArrayRef<StringRef> Features) const override;
387+
uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
388388

389389
bool setFPMath(StringRef Name) override;
390390

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4241,7 +4241,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
42414241
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
42424242
llvm::Function *NewFn);
42434243

4244-
static unsigned getFMVPriority(const TargetInfo &TI,
4244+
static uint64_t getFMVPriority(const TargetInfo &TI,
42454245
const CodeGenFunction::FMVResolverOption &RO) {
42464246
llvm::SmallVector<StringRef, 8> Features{RO.Features};
42474247
if (RO.Architecture)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s
3+
4+
// Priority biskmasks after feature dependency expansion:
5+
//
6+
// MSB LSB
7+
//
8+
// sme2 | ls64 | sme | bf16 | | | fp16 | simd | fp
9+
// -----+------+-----+------+-------+------+------+------+---
10+
// sme2 | | sme | bf16 | rcpc2 | rcpc | fp16 | simd | fp
11+
//
12+
// Dependencies should not affect priorities, since a
13+
// feature can only depend on lower priority features:
14+
// https://github.com/ARM-software/acle/pull/376
15+
16+
__attribute__((target_version("sme2+ls64"))) int fn(void);
17+
__attribute__((target_version("sme2+rcpc2"))) int fn(void);
18+
__attribute__((target_version("default"))) int fn(void) { return 0; }
19+
20+
int call() { return fn(); }
21+
22+
// CHECK-LABEL: define dso_local i32 @fn.default(
23+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
24+
// CHECK-NEXT: [[ENTRY:.*:]]
25+
// CHECK-NEXT: ret i32 0
26+
//
27+
//
28+
// CHECK-LABEL: define dso_local i32 @call(
29+
// CHECK-SAME: ) #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[CALL:%.*]] = call i32 @fn()
32+
// CHECK-NEXT: ret i32 [[CALL]]
33+
//
34+
//
35+
// CHECK-LABEL: define weak_odr ptr @fn.resolver() comdat {
36+
// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]]
37+
// CHECK-NEXT: call void @__init_cpu_features_resolver()
38+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
39+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 153126785511392000
40+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 153126785511392000
41+
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
42+
// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
43+
// CHECK: [[RESOLVER_RETURN]]:
44+
// CHECK-NEXT: ret ptr @fn._Mls64Msme2
45+
// CHECK: [[RESOLVER_ELSE]]:
46+
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
47+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 144119586269233920
48+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 144119586269233920
49+
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
50+
// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
51+
// CHECK: [[RESOLVER_RETURN1]]:
52+
// CHECK-NEXT: ret ptr @fn._Mrcpc2Msme2
53+
// CHECK: [[RESOLVER_ELSE2]]:
54+
// CHECK-NEXT: ret ptr @fn.default
55+
//

clang/test/CodeGen/attr-target-clones-aarch64.c

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
6464
// CHECK-NEXT: resolver_entry:
6565
// CHECK-NEXT: call void @__init_cpu_features_resolver()
6666
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
67-
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664
68-
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
67+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284352
68+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284352
6969
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
7070
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
7171
// CHECK: resolver_return:
72-
// CHECK-NEXT: ret ptr @ftc._MaesMlse
72+
// CHECK-NEXT: ret ptr @ftc._Msve2
7373
// CHECK: resolver_else:
7474
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
75-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
76-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
75+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 33664
76+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 33664
7777
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
7878
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
7979
// CHECK: resolver_return1:
80-
// CHECK-NEXT: ret ptr @ftc._Msve2
80+
// CHECK-NEXT: ret ptr @ftc._MaesMlse
8181
// CHECK: resolver_else2:
8282
// CHECK-NEXT: ret ptr @ftc.default
8383
//
@@ -411,20 +411,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
411411
// CHECK-NEXT: resolver_entry:
412412
// CHECK-NEXT: call void @__init_cpu_features_resolver()
413413
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
414-
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280
415-
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280
414+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624
415+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624
416416
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
417417
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
418418
// CHECK: resolver_return:
419-
// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve
419+
// CHECK-NEXT: ret ptr @ftc_inline3._Mbti
420420
// CHECK: resolver_else:
421421
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
422-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
423-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
422+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 70369817985280
423+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 70369817985280
424424
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
425425
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
426426
// CHECK: resolver_return1:
427-
// CHECK-NEXT: ret ptr @ftc_inline3._Mbti
427+
// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve
428428
// CHECK: resolver_else2:
429429
// CHECK-NEXT: ret ptr @ftc_inline3.default
430430
//
@@ -521,20 +521,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
521521
// CHECK-MTE-BTI-NEXT: resolver_entry:
522522
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
523523
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
524-
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664
525-
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
524+
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284352
525+
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284352
526526
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
527527
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
528528
// CHECK-MTE-BTI: resolver_return:
529-
// CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse
529+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc._Msve2
530530
// CHECK-MTE-BTI: resolver_else:
531531
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
532-
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
533-
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
532+
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 33664
533+
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 33664
534534
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
535535
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
536536
// CHECK-MTE-BTI: resolver_return1:
537-
// CHECK-MTE-BTI-NEXT: ret ptr @ftc._Msve2
537+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse
538538
// CHECK-MTE-BTI: resolver_else2:
539539
// CHECK-MTE-BTI-NEXT: ret ptr @ftc.default
540540
//
@@ -868,20 +868,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
868868
// CHECK-MTE-BTI-NEXT: resolver_entry:
869869
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
870870
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
871-
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280
872-
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280
871+
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624
872+
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624
873873
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
874874
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
875875
// CHECK-MTE-BTI: resolver_return:
876-
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve
876+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti
877877
// CHECK-MTE-BTI: resolver_else:
878878
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
879-
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
880-
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
879+
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 70369817985280
880+
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 70369817985280
881881
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
882882
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
883883
// CHECK-MTE-BTI: resolver_return1:
884-
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti
884+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve
885885
// CHECK-MTE-BTI: resolver_else2:
886886
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default
887887
//

0 commit comments

Comments
 (0)