Skip to content

Commit ad78e21

Browse files
authored
[NFC][AArch64] Tests for guarding unrolling with scalable vec ins/ext (#81132)
1 parent 93db5b7 commit ad78e21

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,61 @@ declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>
3030
declare <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64)
3131
declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)
3232

33+
define void @vector_insert_extract_idxzero_128b() #1 {
34+
; CHECK-LABEL: 'vector_insert_extract_idxzero_128b'
35+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
36+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
37+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
38+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
39+
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
40+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
41+
;
42+
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_128b'
43+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
44+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
45+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
46+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
47+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
48+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
49+
;
50+
%insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
51+
%extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
52+
%insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
53+
%extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
54+
%extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
55+
ret void
56+
}
57+
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
58+
declare <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double>, i64)
59+
declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1>, <vscale x 2 x i1>, i64)
60+
declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
61+
62+
define void @vector_insert_extract_idxzero_256b() #2 {
63+
; CHECK-LABEL: 'vector_insert_extract_idxzero_256b'
64+
; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
65+
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0)
66+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
67+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
68+
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
69+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
70+
;
71+
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_256b'
72+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
73+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0)
74+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
75+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
76+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
77+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
78+
;
79+
%insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
80+
%extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> undef, i64 0)
81+
%insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
82+
%extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
83+
%extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
84+
ret void
85+
}
86+
declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16>, <16 x i16>, i64)
87+
declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64)
3388

3489
define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) {
3590
; CHECK-LABEL: 'reductions'
@@ -864,3 +919,5 @@ declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x ptr> %ptrs, i32 %a
864919
declare void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 %align, <1 x i1> %masks)
865920

866921
attributes #0 = { "target-features"="+sve,+bf16" }
922+
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
923+
attributes #2 = { "target-features"="+sve" vscale_range(2, 16) }
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -passes=loop-unroll,simplifycfg -S -mtriple aarch64 %s | FileCheck %s
3+
4+
;; This test contains IR similar to what would be generated when SVE ACLE
5+
;; routines are used with fixed-width vector types -- lots of subvector inserts
6+
;; and extracts that are effectively just bitcasts since the types are the
7+
;; same at a given SVE bit size. We want to make sure that they are not a
8+
;; barrier to unrolling simple loops with a fixed trip count which could be
9+
;; further optimized.
10+
11+
define void @test_ins_ext_cost(ptr readonly %a, ptr readonly %b, ptr readonly %c, ptr noalias %d) #0 {
12+
; CHECK-LABEL: define void @test_ins_ext_cost(
13+
; CHECK-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr readonly [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; CHECK-NEXT: entry:
15+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
16+
; CHECK: for.body:
17+
; CHECK-NEXT: [[EXIT_COND:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[FOR_BODY]] ]
18+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 1, [[FOR_BODY]] ]
19+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds <8 x float>, ptr [[A]], i64 [[IV]]
20+
; CHECK-NEXT: [[LOAD_A:%.*]] = load <8 x float>, ptr [[GEP_A]], align 16
21+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds <8 x float>, ptr [[B]], i64 [[IV]]
22+
; CHECK-NEXT: [[LOAD_B:%.*]] = load <8 x float>, ptr [[GEP_B]], align 16
23+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds <8 x float>, ptr [[C]], i64 [[IV]]
24+
; CHECK-NEXT: [[LOAD_C:%.*]] = load <8 x float>, ptr [[GEP_C]], align 16
25+
; CHECK-NEXT: [[CAST_SCALABLE_B:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_B]], i64 0)
26+
; CHECK-NEXT: [[CAST_SCALABLE_C:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_C]], i64 0)
27+
; CHECK-NEXT: [[ADD:%.*]] = fadd <vscale x 4 x float> [[CAST_SCALABLE_B]], [[CAST_SCALABLE_C]]
28+
; CHECK-NEXT: [[CAST_SCALABLE_A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_A]], i64 0)
29+
; CHECK-NEXT: [[MUL:%.*]] = fmul <vscale x 4 x float> [[CAST_SCALABLE_A]], [[ADD]]
30+
; CHECK-NEXT: [[CAST_FIXED_D:%.*]] = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[MUL]], i64 0)
31+
; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds <8 x float>, ptr [[D]], i64 0, i64 [[IV]]
32+
; CHECK-NEXT: store <8 x float> [[CAST_FIXED_D]], ptr [[GEP_D]], align 16
33+
; CHECK-NEXT: br i1 [[EXIT_COND]], label [[FOR_BODY]], label [[EXIT:%.*]]
34+
; CHECK: exit:
35+
; CHECK-NEXT: ret void
36+
;
37+
entry:
38+
br label %for.body
39+
40+
for.body:
41+
%exit.cond = phi i1 [ true, %entry ], [ false, %for.body ]
42+
%iv = phi i64 [ 0, %entry ], [ 1, %for.body ]
43+
%gep.a = getelementptr inbounds <8 x float>, ptr %a, i64 %iv
44+
%load.a = load <8 x float>, ptr %gep.a, align 16
45+
%gep.b = getelementptr inbounds <8 x float>, ptr %b, i64 %iv
46+
%load.b = load <8 x float>, ptr %gep.b, align 16
47+
%gep.c = getelementptr inbounds <8 x float>, ptr %c, i64 %iv
48+
%load.c = load <8 x float>, ptr %gep.c, align 16
49+
%cast.scalable.b = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> %load.b, i64 0)
50+
%cast.scalable.c = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> %load.c, i64 0)
51+
%add = fadd <vscale x 4 x float> %cast.scalable.b, %cast.scalable.c
52+
%cast.scalable.a = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> %load.a, i64 0)
53+
%mul = fmul <vscale x 4 x float> %cast.scalable.a, %add
54+
%cast.fixed.d = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> %mul, i64 0)
55+
%gep.d = getelementptr inbounds <8 x float>, ptr %d, i64 0, i64 %iv
56+
store <8 x float> %cast.fixed.d, ptr %gep.d, align 16
57+
br i1 %exit.cond, label %for.body, label %exit
58+
59+
exit:
60+
ret void
61+
}
62+
63+
attributes #0 = { "target-features"="+sve" vscale_range(2, 16) }

0 commit comments

Comments
 (0)