Skip to content

Commit de07976

Browse files
committed
[AArch64][SME] Remove immediate argument restriction for svldr and svstr
The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour.
1 parent b81bfea commit de07976

File tree

7 files changed

+166
-107
lines changed

7 files changed

+166
-107
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
97129712
return Store;
97139713
}
97149714

9715+
Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
9716+
llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
9717+
return Builder.CreateAdd(Base, CastOffset, "tileslice");
9718+
}
9719+
97159720
Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
97169721
SmallVectorImpl<Value *> &Ops,
97179722
unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
97679772
Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
97689773
SmallVectorImpl<Value *> &Ops,
97699774
unsigned IntID) {
9770-
if (Ops.size() == 3) {
9771-
Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9772-
llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
9773-
9774-
llvm::Value *VecNum = Ops[2];
9775-
llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
9776-
9777-
Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
9778-
Ops[0] = Builder.CreateAdd(
9779-
Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
9780-
Ops.erase(&Ops[2]);
9781-
}
9775+
if (Ops.size() == 2) {
9776+
// Intrinsics without a vecnum also use this function, so just provide 0
9777+
Ops.push_back(Ops[1]);
9778+
Ops[1] = Builder.getInt32(0);
9779+
} else {
9780+
int Imm = -1;
9781+
if (ConstantInt* C = dyn_cast<ConstantInt>(Ops[2]))
9782+
if (C->getZExtValue() <= 15)
9783+
Imm = C->getZExtValue();
9784+
9785+
if (Imm != -1) {
9786+
Ops[2] = Ops[1];
9787+
Ops[1] = Builder.getInt32(Imm);
9788+
} else {
9789+
Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9790+
llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
9791+
9792+
llvm::Value *VecNum = Ops[2];
9793+
llvm::Value *MulVL = Builder.CreateMul(
9794+
CntsbCall,
9795+
VecNum,
9796+
"mulvl");
9797+
9798+
Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
9799+
Ops[1] = Builder.getInt32(0);
9800+
Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false);
9801+
}
9802+
}
97829803
Function *F = CGM.getIntrinsic(IntID, {});
97839804
return Builder.CreateCall(F, Ops);
97849805
}

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,57 +6,46 @@
66

77
#include <arm_sme_draft_spec_subject_to_change.h>
88

9-
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
10-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11-
// CHECK-C-NEXT: entry:
12-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]])
13-
// CHECK-C-NEXT: ret void
14-
//
15-
// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
16-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
17-
// CHECK-CXX-NEXT: entry:
18-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]])
19-
// CHECK-CXX-NEXT: ret void
9+
// CHECK-C-LABEL: @test_svldr_vnum_za(
10+
// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv(
11+
// CHECK-NEXT: entry:
12+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]])
13+
// CHECK-NEXT: ret void
2014
//
2115
void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
2216
svldr_vnum_za(slice_base, ptr, 0);
2317
}
2418

25-
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za_1(
26-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
27-
// CHECK-C-NEXT: entry:
28-
// CHECK-C-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
29-
// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
30-
// CHECK-C-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
31-
// CHECK-C-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE]], 15
32-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]])
33-
// CHECK-C-NEXT: ret void
34-
//
35-
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svldr_vnum_za_1jPKv(
36-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
37-
// CHECK-CXX-NEXT: entry:
38-
// CHECK-CXX-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
39-
// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
40-
// CHECK-CXX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
41-
// CHECK-CXX-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE]], 15
42-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]])
43-
// CHECK-CXX-NEXT: ret void
19+
// CHECK-C-LABEL: @test_svldr_vnum_za_1(
20+
// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv(
21+
// CHECK-NEXT: entry:
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 15, ptr [[PTR]])
23+
// CHECK-NEXT: ret void
4424
//
4525
void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
4626
svldr_vnum_za(slice_base, ptr, 15);
4727
}
4828

49-
// CHECK-C-LABEL: define dso_local void @test_svldr_za(
50-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
51-
// CHECK-C-NEXT: entry:
52-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]])
53-
// CHECK-C-NEXT: ret void
29+
// CHECK-C-LABEL: @test_svldr_vnum_za_var(
30+
// CHECK-CXX-LABEL: @_Z22test_svldr_vnum_za_varjPKvm(
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
33+
// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM]]
34+
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
35+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VNUM]] to i32
36+
// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[SLICE_BASE]]
37+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TMP2]], i32 0, ptr [[TMP0]])
38+
// CHECK-NEXT: ret void
5439
//
55-
// CHECK-CXX-LABEL: define dso_local void @_Z13test_svldr_zajPKv(
56-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
57-
// CHECK-CXX-NEXT: entry:
58-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]])
59-
// CHECK-CXX-NEXT: ret void
40+
void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, uint64_t vnum) {
41+
svldr_vnum_za(slice_base, ptr, vnum);
42+
}
43+
44+
// CHECK-C-LABEL: @test_svldr_za(
45+
// CHECK-CXX-LABEL: @_Z13test_svldr_zajPKv(
46+
// CHECK-NEXT: entry:
47+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]])
48+
// CHECK-NEXT: ret void
6049
//
6150
void test_svldr_za(uint32_t slice_base, const void *ptr) {
6251
svldr_za(slice_base, ptr);
@@ -87,5 +76,3 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) {
8776
void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) {
8877
svldr_vnum_za(slice_base, ptr, vnum);
8978
}
90-
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
91-
// CHECK: {{.*}}

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,57 +6,32 @@
66

77
#include <arm_sme_draft_spec_subject_to_change.h>
88

9-
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za(
10-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11-
// CHECK-C-NEXT: entry:
12-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]])
13-
// CHECK-C-NEXT: ret void
14-
//
15-
// CHECK-CXX-LABEL: define dso_local void @_Z18test_svstr_vnum_zajPv(
16-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
17-
// CHECK-CXX-NEXT: entry:
18-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]])
19-
// CHECK-CXX-NEXT: ret void
9+
// CHECK-C-LABEL: @test_svstr_vnum_za(
10+
// CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv(
11+
// CHECK-NEXT: entry:
12+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]])
13+
// CHECK-NEXT: ret void
2014
//
2115
void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
2216
svstr_vnum_za(slice_base, ptr, 0);
2317
}
2418

2519
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za_1(
26-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
27-
// CHECK-C-NEXT: entry:
28-
// CHECK-C-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
29-
// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
30-
// CHECK-C-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
31-
// CHECK-C-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE]], 15
32-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]])
33-
// CHECK-C-NEXT: ret void
34-
//
3520
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svstr_vnum_za_1jPv(
36-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
37-
// CHECK-CXX-NEXT: entry:
38-
// CHECK-CXX-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
39-
// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
40-
// CHECK-CXX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
41-
// CHECK-CXX-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE]], 15
42-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]])
43-
// CHECK-CXX-NEXT: ret void
21+
// CHECK-NEXT: entry:
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 15, ptr [[PTR]])
23+
// CHECK-NEXT: ret void
4424
//
4525
void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
4626
svstr_vnum_za(slice_base, ptr, 15);
4727
}
4828

4929
// CHECK-C-LABEL: define dso_local void @test_svstr_za(
50-
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
51-
// CHECK-C-NEXT: entry:
52-
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]])
53-
// CHECK-C-NEXT: ret void
54-
//
5530
// CHECK-CXX-LABEL: define dso_local void @_Z13test_svstr_zajPv(
56-
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
57-
// CHECK-CXX-NEXT: entry:
58-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]])
59-
// CHECK-CXX-NEXT: ret void
31+
// CHECK-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
32+
// CHECK-NEXT: entry:
33+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], i32 0, ptr [[PTR]])
34+
// CHECK-NEXT: ret void
6035
//
6136
void test_svstr_za(uint32_t slice_base, void *ptr) {
6237
svstr_za(slice_base, ptr);
@@ -87,5 +62,3 @@ void test_svstr_za(uint32_t slice_base, void *ptr) {
8762
void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) {
8863
svstr_vnum_za(slice_base, ptr, vnum);
8964
}
90-
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
91-
// CHECK: {{.*}}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2680,9 +2680,9 @@ let TargetPrefix = "aarch64" in {
26802680

26812681
// Spill + fill
26822682
def int_aarch64_sme_ldr : DefaultAttrsIntrinsic<
2683-
[], [llvm_i32_ty, llvm_ptr_ty]>;
2683+
[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<1>>]>;
26842684
def int_aarch64_sme_str : DefaultAttrsIntrinsic<
2685-
[], [llvm_i32_ty, llvm_ptr_ty]>;
2685+
[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<1>>]>;
26862686

26872687
class SME_TileToVector_Intrinsic
26882688
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -794,8 +794,8 @@ multiclass sme_spill<string opcodestr> {
794794
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
795795
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
796796
// base
797-
def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
798-
(!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
797+
def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm, GPR64sp:$base),
798+
(!cast<Instruction>(NAME) ZA, $idx, $imm, $base, 0)>;
799799
}
800800

801801
multiclass sme_fill<string opcodestr> {
@@ -805,16 +805,16 @@ multiclass sme_fill<string opcodestr> {
805805
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
806806
def NAME # _PSEUDO
807807
: Pseudo<(outs),
808-
(ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
808+
(ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4,
809809
GPR64sp:$base), []>,
810810
Sched<[]> {
811811
// Translated to actual instruction in AArch64ISelLowering.cpp
812812
let usesCustomInserter = 1;
813813
let mayLoad = 1;
814814
}
815815
// base
816-
def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
817-
(!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
816+
def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm, GPR64sp:$base),
817+
(!cast<Instruction>(NAME # _PSEUDO) $idx, $imm, $base)>;
818818
}
819819

820820
//===----------------------------------------------------------------------===//
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC
3+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP
4+
5+
define float @f(ptr %x) {
6+
; CHECK-NOVEC-LABEL: 'f'
7+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a.0.copyload = load float, ptr %x, align 1
8+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %a.0.copyload
9+
;
10+
; CHECK-FP-LABEL: 'f'
11+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a.0.copyload = load float, ptr %x, align 1
12+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %a.0.copyload
13+
;
14+
entry:
15+
%a.0.copyload = load float, ptr %x, align 1
16+
ret float %a.0.copyload
17+
}
18+
19+
define float @ff(ptr %x, float %f) {
20+
; CHECK-NOVEC-LABEL: 'ff'
21+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %f, ptr %x, align 1
22+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
23+
;
24+
; CHECK-FP-LABEL: 'ff'
25+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store float %f, ptr %x, align 1
26+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
27+
;
28+
entry:
29+
store float %f, ptr %x, align 1
30+
ret float undef
31+
}
32+
33+
define double @d(ptr %x) {
34+
; CHECK-NOVEC-LABEL: 'd'
35+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a.0.copyload = load double, ptr %x, align 1
36+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %a.0.copyload
37+
;
38+
; CHECK-FP-LABEL: 'd'
39+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a.0.copyload = load double, ptr %x, align 1
40+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %a.0.copyload
41+
;
42+
entry:
43+
%a.0.copyload = load double, ptr %x, align 1
44+
ret double %a.0.copyload
45+
}
46+
47+
define double @dd(ptr %x, double %f) {
48+
; CHECK-NOVEC-LABEL: 'dd'
49+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double %f, ptr %x, align 1
50+
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
51+
;
52+
; CHECK-FP-LABEL: 'dd'
53+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double %f, ptr %x, align 1
54+
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
55+
;
56+
entry:
57+
store double %f, ptr %x, align 1
58+
ret double undef
59+
}

0 commit comments

Comments
 (0)