Skip to content

Commit 4eaa070

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:a4fd3dba6e285734bc635b0651a30dfeffedeada into amd-gfx:7dad81725749
Local branch amd-gfx 7dad817 Merged main:5d4a0d54b5269bad1410e6db957836fe98634069 into amd-gfx:2577f0f72239 Remote branch main a4fd3db [AMDGPU] Use wider loop lowering type for LowerMemIntrinsics (llvm#112332)
2 parents 7dad817 + a4fd3db commit 4eaa070

35 files changed

+16820
-415
lines changed

clang/test/Driver/print-supported-extensions-riscv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
// CHECK-NEXT: zvl8192b 1.0 'Zvl' (Minimum Vector Length) 8192
111111
// CHECK-NEXT: zhinx 1.0 'Zhinx' (Half Float in Integer)
112112
// CHECK-NEXT: zhinxmin 1.0 'Zhinxmin' (Half Float in Integer Minimal)
113+
// CHECK-NEXT: sha 1.0 'Sha' (Augmented Hypervisor)
113114
// CHECK-NEXT: shcounterenw 1.0 'Shcounterenw' (Support writeable hcounteren enable bit for any hpmcounter that is not read-only zero)
114115
// CHECK-NEXT: shgatpa 1.0 'Sgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare)
115116
// CHECK-NEXT: shtvala 1.0 'Shtvala' (htval provides all needed values)

clang/test/Preprocessor/riscv-target-features.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
// CHECK-NOT: __riscv_m {{.*$}}
2121
// CHECK-NOT: __riscv_mul {{.*$}}
2222
// CHECK-NOT: __riscv_muldiv {{.*$}}
23+
// CHECK-NOT: __riscv_sha {{.*$}}
2324
// CHECK-NOT: __riscv_shcounterenw {{.*$}}
2425
// CHECK-NOT: __riscv_shgatpa {{.*$}}
2526
// CHECK-NOT: __riscv_shtvala {{.*$}}
@@ -323,6 +324,14 @@
323324
// CHECK-M-EXT: __riscv_mul 1
324325
// CHECK-M-EXT: __riscv_muldiv 1
325326

327+
// RUN: %clang --target=riscv32-unknown-linux-gnu \
328+
// RUN: -march=rv32isha -E -dM %s \
329+
// RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s
330+
// RUN: %clang --target=riscv64-unknown-linux-gnu \
331+
// RUN: -march=rv64isha -E -dM %s \
332+
// RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s
333+
// CHECK-SHA-EXT: __riscv_sha 1000000{{$}}
334+
326335
// RUN: %clang --target=riscv32-unknown-linux-gnu \
327336
// RUN: -march=rv32ishcounterenw -E -dM %s \
328337
// RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s

llvm/docs/RISCVUsage.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ on support follow.
119119
``E`` Supported (`See note <#riscv-rve-note>`__)
120120
``H`` Assembly Support
121121
``M`` Supported
122+
``Sha`` Supported
122123
``Shcounterenw`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
123124
``Shgatpa`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
124125
``Shtvala`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__)

llvm/docs/ReleaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ Changes to the RISC-V Backend
183183
* The `Zacas` extension is no longer marked as experimental.
184184
* The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions
185185
are no longer marked as experimental.
186+
* The `Sha` extension is now supported.
186187

187188
Changes to the WebAssembly Backend
188189
----------------------------------

llvm/include/llvm/ADT/ArrayRef.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,16 @@ namespace llvm {
7070
/*implicit*/ ArrayRef(std::nullopt_t) {}
7171

7272
/// Construct an ArrayRef from a single element.
73-
/*implicit*/ ArrayRef(const T &OneElt)
74-
: Data(&OneElt), Length(1) {}
73+
/*implicit*/ ArrayRef(const T &OneElt LLVM_LIFETIME_BOUND)
74+
: Data(&OneElt), Length(1) {}
7575

7676
/// Construct an ArrayRef from a pointer and length.
77-
constexpr /*implicit*/ ArrayRef(const T *data, size_t length)
77+
constexpr /*implicit*/ ArrayRef(const T *data LLVM_LIFETIME_BOUND,
78+
size_t length)
7879
: Data(data), Length(length) {}
7980

8081
/// Construct an ArrayRef from a range.
81-
constexpr ArrayRef(const T *begin, const T *end)
82+
constexpr ArrayRef(const T *begin LLVM_LIFETIME_BOUND, const T *end)
8283
: Data(begin), Length(end - begin) {
8384
assert(begin <= end);
8485
}
@@ -103,7 +104,8 @@ namespace llvm {
103104

104105
/// Construct an ArrayRef from a C array.
105106
template <size_t N>
106-
/*implicit*/ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {}
107+
/*implicit*/ constexpr ArrayRef(const T (&Arr LLVM_LIFETIME_BOUND)[N])
108+
: Data(Arr), Length(N) {}
107109

108110
/// Construct an ArrayRef from a std::initializer_list.
109111
#if LLVM_GNUC_PREREQ(9, 0, 0)
@@ -113,7 +115,8 @@ namespace llvm {
113115
#pragma GCC diagnostic push
114116
#pragma GCC diagnostic ignored "-Winit-list-lifetime"
115117
#endif
116-
constexpr /*implicit*/ ArrayRef(std::initializer_list<T> Vec)
118+
constexpr /*implicit*/ ArrayRef(
119+
std::initializer_list<T> Vec LLVM_LIFETIME_BOUND)
117120
: Data(Vec.begin() == Vec.end() ? (T *)nullptr : Vec.begin()),
118121
Length(Vec.size()) {}
119122
#if LLVM_GNUC_PREREQ(9, 0, 0)

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 516212
19+
#define LLVM_MAIN_REVISION 516218
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Support/Compiler.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,12 @@
413413
#define LLVM_GSL_POINTER
414414
#endif
415415

416+
#if LLVM_HAS_CPP_ATTRIBUTE(clang::lifetimebound)
417+
#define LLVM_LIFETIME_BOUND [[clang::lifetimebound]]
418+
#else
419+
#define LLVM_LIFETIME_BOUND
420+
#endif
421+
416422
#if LLVM_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L
417423
#define LLVM_CTOR_NODISCARD [[nodiscard]]
418424
#else

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ static cl::opt<size_t> InlineMaxBB(
7575
cl::desc("Maximum number of BBs allowed in a function after inlining"
7676
" (compile time constraint)"));
7777

78+
// This default unroll factor is based on microbenchmarks on gfx1030.
79+
static cl::opt<unsigned> MemcpyLoopUnroll(
80+
"amdgpu-memcpy-loop-unroll",
81+
cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory "
82+
"operations when lowering memcpy as a loop"),
83+
cl::init(16), cl::Hidden);
84+
7885
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
7986
unsigned Depth = 0) {
8087
const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -409,13 +416,8 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
409416
return 1024;
410417
}
411418

412-
// FIXME: Really we would like to issue multiple 128-bit loads and stores per
413-
// iteration. Should we report a larger size and let it legalize?
414-
//
415419
// FIXME: Should we use narrower types for local/region, or account for when
416420
// unaligned access is legal?
417-
//
418-
// FIXME: This could use fine tuning and microbenchmarks.
419421
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
420422
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
421423
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
@@ -442,17 +444,29 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
442444
return FixedVectorType::get(Type::getInt32Ty(Context), 2);
443445
}
444446

445-
// Global memory works best with 16-byte accesses. Private memory will also
446-
// hit this, although they'll be decomposed.
447-
return FixedVectorType::get(Type::getInt32Ty(Context), 4);
447+
// Global memory works best with 16-byte accesses.
448+
// If the operation has a fixed known length that is large enough, it is
449+
// worthwhile to return an even wider type and let legalization lower it into
450+
// multiple accesses, effectively unrolling the memcpy loop. Private memory
451+
// also hits this, although accesses may be decomposed.
452+
//
453+
// Don't unroll if Length is not a constant, since unrolling leads to worse
454+
// performance for length values that are smaller or slightly larger than the
455+
// total size of the type returned here. Mitigating that would require a more
456+
// complex lowering for variable-length memcpy and memmove.
457+
unsigned I32EltsInVector = 4;
458+
if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length))
459+
return FixedVectorType::get(Type::getInt32Ty(Context),
460+
MemcpyLoopUnroll * I32EltsInVector);
461+
462+
return FixedVectorType::get(Type::getInt32Ty(Context), I32EltsInVector);
448463
}
449464

450465
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
451466
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
452467
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
453468
Align SrcAlign, Align DestAlign,
454469
std::optional<uint32_t> AtomicCpySize) const {
455-
assert(RemainingBytes < 16);
456470

457471
if (AtomicCpySize)
458472
BaseT::getMemcpyLoopResidualLoweringType(
@@ -462,6 +476,12 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
462476
Align MinAlign = std::min(SrcAlign, DestAlign);
463477

464478
if (MinAlign != Align(2)) {
479+
Type *I32x4Ty = FixedVectorType::get(Type::getInt32Ty(Context), 4);
480+
while (RemainingBytes >= 16) {
481+
OpsOut.push_back(I32x4Ty);
482+
RemainingBytes -= 16;
483+
}
484+
465485
Type *I64Ty = Type::getInt64Ty(Context);
466486
while (RemainingBytes >= 8) {
467487
OpsOut.push_back(I64Ty);

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,13 @@ def FeatureStdExtSvpbmt
10291029
: RISCVExtension<"svpbmt", 1, 0,
10301030
"'Svpbmt' (Page-Based Memory Types)">;
10311031

1032+
def FeatureStdExtSha
1033+
: RISCVExtension<"sha", 1, 0,
1034+
"'Sha' (Augmented Hypervisor)",
1035+
[FeatureStdExtH, FeatureStdExtSsstateen, FeatureStdExtShcounterenw,
1036+
FeatureStdExtShvstvala, FeatureStdExtShtvala, FeatureStdExtShvstvecd,
1037+
FeatureStdExtShvsatpa, FeatureStdExtShgatpa]>;
1038+
10321039
// Pointer Masking extensions
10331040

10341041
// A supervisor-level extension that provides pointer masking for the next lower

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2393,6 +2393,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
23932393
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
23942394
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
23952395
setF16Action(VT, Expand);
2396+
if (!Subtarget.hasBF16())
2397+
setOperationAction(ISD::VSELECT, VT, Custom);
23962398
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
23972399
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
23982400
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3753,7 +3753,9 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
37533753
}
37543754

37553755
// Replace all dominated uses of the condition with true/false
3756-
if (BI.getSuccessor(0) != BI.getSuccessor(1)) {
3756+
// Ignore constant expressions to avoid iterating over uses on other
3757+
// functions.
3758+
if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
37573759
for (auto &U : make_early_inc_range(Cond->uses())) {
37583760
BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
37593761
if (DT.dominates(Edge0, U)) {

llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ define double @t1_strict(double %x) #0 {
131131
; CHECK-NEXT: ret
132132
entry:
133133
%conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
134-
%conv1 = call double @llvm.experimental.constrained.sitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
134+
%conv1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
135135
ret double %conv1
136136
}
137137

@@ -143,7 +143,7 @@ define float @t2_strict(float %x) #0 {
143143
; CHECK-NEXT: ret
144144
entry:
145145
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
146-
%conv1 = call float @llvm.experimental.constrained.sitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
146+
%conv1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
147147
ret float %conv1
148148
}
149149

@@ -155,7 +155,7 @@ define half @t3_strict(half %x) #0 {
155155
; CHECK-NEXT: ret
156156
entry:
157157
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
158-
%conv1 = call half @llvm.experimental.constrained.sitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
158+
%conv1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
159159
ret half %conv1
160160
}
161161

@@ -167,7 +167,7 @@ define double @t4_strict(double %x) #0 {
167167
; CHECK-NEXT: ret
168168
entry:
169169
%conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
170-
%conv1 = call double @llvm.experimental.constrained.uitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
170+
%conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
171171
ret double %conv1
172172
}
173173

@@ -179,7 +179,7 @@ define float @t5_strict(float %x) #0 {
179179
; CHECK-NEXT: ret
180180
entry:
181181
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
182-
%conv1 = call float @llvm.experimental.constrained.uitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
182+
%conv1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
183183
ret float %conv1
184184
}
185185

@@ -191,7 +191,7 @@ define half @t6_strict(half %x) #0 {
191191
; CHECK-NEXT: ret
192192
entry:
193193
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
194-
%conv1 = call half @llvm.experimental.constrained.uitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
194+
%conv1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
195195
ret half %conv1
196196
}
197197

@@ -216,7 +216,7 @@ define bfloat @t7_strict(bfloat %x) #0 {
216216
; CHECK-NEXT: ret
217217
entry:
218218
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
219-
%conv1 = call bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
219+
%conv1 = call bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
220220
ret bfloat %conv1
221221
}
222222

@@ -241,7 +241,7 @@ define bfloat @t8_strict(bfloat %x) #0 {
241241
; CHECK-NEXT: ret
242242
entry:
243243
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
244-
%conv1 = call bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
244+
%conv1 = call bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
245245
ret bfloat %conv1
246246
}
247247

@@ -255,11 +255,11 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
255255
declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
256256
declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
257257
declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
258-
declare bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32, metadata, metadata)
259-
declare bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32, metadata, metadata)
260-
declare half @llvm.experimental.constrained.sitofp.i32.f16(i32, metadata, metadata)
261-
declare half @llvm.experimental.constrained.uitofp.i32.f16(i32, metadata, metadata)
262-
declare float @llvm.experimental.constrained.sitofp.i32.f32(i32, metadata, metadata)
263-
declare float @llvm.experimental.constrained.uitofp.i32.f32(i32, metadata, metadata)
264-
declare double @llvm.experimental.constrained.sitofp.i64.f64(i64, metadata, metadata)
265-
declare double @llvm.experimental.constrained.uitofp.i64.f64(i64, metadata, metadata)
258+
declare bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32, metadata, metadata)
259+
declare bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32, metadata, metadata)
260+
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
261+
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
262+
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
263+
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
264+
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
265+
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)

llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ define i32 @lrint_f16(half %x) #0 {
595595
; CHECK-FP16-NEXT: frintx h0, h0
596596
; CHECK-FP16-NEXT: fcvtzs w0, h0
597597
; CHECK-FP16-NEXT: ret
598-
%val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
598+
%val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
599599
ret i32 %val
600600
}
601601

@@ -612,7 +612,7 @@ define i64 @llrint_f16(half %x) #0 {
612612
; CHECK-FP16-NEXT: frintx h0, h0
613613
; CHECK-FP16-NEXT: fcvtzs x0, h0
614614
; CHECK-FP16-NEXT: ret
615-
%val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
615+
%val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
616616
ret i64 %val
617617
}
618618

@@ -693,7 +693,7 @@ define i32 @lround_f16(half %x) #0 {
693693
; CHECK-FP16: // %bb.0:
694694
; CHECK-FP16-NEXT: fcvtas w0, h0
695695
; CHECK-FP16-NEXT: ret
696-
%val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0
696+
%val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0
697697
ret i32 %val
698698
}
699699

@@ -708,7 +708,7 @@ define i64 @llround_f16(half %x) #0 {
708708
; CHECK-FP16: // %bb.0:
709709
; CHECK-FP16-NEXT: fcvtas x0, h0
710710
; CHECK-FP16-NEXT: ret
711-
%val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0
711+
%val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0
712712
ret i64 %val
713713
}
714714

@@ -1277,14 +1277,14 @@ declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata)
12771277
declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata)
12781278
declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
12791279
declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
1280-
declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata)
1281-
declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata)
1280+
declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata)
1281+
declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata)
12821282
declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata)
12831283
declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata)
12841284
declare half @llvm.experimental.constrained.ceil.f16(half, metadata)
12851285
declare half @llvm.experimental.constrained.floor.f16(half, metadata)
1286-
declare i32 @llvm.experimental.constrained.lround.f16(half, metadata)
1287-
declare i64 @llvm.experimental.constrained.llround.f16(half, metadata)
1286+
declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata)
1287+
declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata)
12881288
declare half @llvm.experimental.constrained.round.f16(half, metadata)
12891289
declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
12901290
declare half @llvm.experimental.constrained.trunc.f16(half, metadata)

llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 {
279279
; CHECK-NEXT: xtn v0.4h, v4.4s
280280
; CHECK-NEXT: ret
281281
entry:
282-
%val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
282+
%val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
283283
ret <4 x i1> %val
284284
}
285285

@@ -825,8 +825,8 @@ declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, meta
825825
declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata)
826826
declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata)
827827
declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
828-
declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float>, <4 x float>, metadata, metadata)
829-
declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float>, <4 x float>, metadata, metadata)
828+
declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata)
829+
declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata)
830830

831831
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
832832
declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)

0 commit comments

Comments
 (0)