@@ -9,17 +9,25 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6
9
9
; CHECK-LABEL: define void @test_uniform
10
10
; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
11
11
; CHECK-NEXT: entry:
12
- ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
13
- ; CHECK: for.body:
14
- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
15
- ; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
16
- ; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
17
- ; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[UNIFORM]]) #[[ATTR1:[0-9]+]]
18
- ; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
19
- ; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
20
- ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
21
- ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
22
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
12
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
14
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]])
15
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
16
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
17
+ ; CHECK: vector.body:
18
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
19
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
20
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
21
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP3]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
22
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
23
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
24
+ ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP4]], ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
25
+ ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
26
+ ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
27
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
28
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]])
29
+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
30
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
23
31
; CHECK: for.cond.cleanup:
24
32
; CHECK-NEXT: ret void
25
33
;
@@ -41,6 +49,50 @@ for.cond.cleanup:
41
49
ret void
42
50
}
43
51
52
+ define void @test_uniform_smaller_scalar (ptr noalias %dst , ptr readonly %src , i32 %uniform , i64 %n ) #0 {
53
+ ; CHECK-LABEL: define void @test_uniform_smaller_scalar
54
+ ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
55
+ ; CHECK-NEXT: entry:
56
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
57
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
58
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]])
59
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
60
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
61
+ ; CHECK: vector.body:
62
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
63
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
64
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
65
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP3]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
66
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
67
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
68
+ ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP4]], ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
69
+ ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
70
+ ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
71
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
72
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]])
73
+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
74
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
75
+ ; CHECK: for.cond.cleanup:
76
+ ; CHECK-NEXT: ret void
77
+ ;
78
+ entry:
79
+ br label %for.body
80
+
81
+ for.body:
82
+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
83
+ %gepsrc = getelementptr double , ptr %src , i64 %indvars.iv
84
+ %data = load double , ptr %gepsrc , align 8
85
+ %call = call double @bar (double %data , i32 %uniform ) #2
86
+ %gepdst = getelementptr inbounds double , ptr %dst , i64 %indvars.iv
87
+ store double %call , ptr %gepdst
88
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
89
+ %exitcond = icmp eq i64 %indvars.iv.next , %n
90
+ br i1 %exitcond , label %for.cond.cleanup , label %for.body
91
+
92
+ for.cond.cleanup:
93
+ ret void
94
+ }
95
+
44
96
; If the parameter is not uniform, then we can't use the vector variant.
45
97
define void @test_uniform_not_invariant (ptr noalias %dst , ptr readonly %src , i64 %n ) #0 {
46
98
; CHECK-LABEL: define void @test_uniform_not_invariant
@@ -51,7 +103,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
51
103
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
52
104
; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
53
105
; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
54
- ; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR1 ]]
106
+ ; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+ ]]
55
107
; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
56
108
; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
57
109
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -80,11 +132,14 @@ for.cond.cleanup:
80
132
81
133
; Scalar functions
82
134
declare double @foo (double , i64 )
135
+ declare double @bar (double , i32 )
83
136
84
137
; Vector variants
85
138
declare <vscale x 2 x double > @foo_uniform (<vscale x 2 x double >, i64 , <vscale x 2 x i1 >)
139
+ declare <vscale x 2 x double > @bar_uniform (<vscale x 2 x double >, i32 , <vscale x 2 x i1 >)
86
140
87
141
attributes #0 = { "target-features" ="+sve" }
88
142
89
143
; Mappings
90
- attributes #1 = { nounwind "vector-function-abi-variant" ="_ZGV_LLVM_Mxvu_foo(foo_uniform)" }
144
+ attributes #1 = { nounwind "vector-function-abi-variant" ="_ZGVsMxvu_foo(foo_uniform)" }
145
+ attributes #2 = { nounwind "vector-function-abi-variant" ="_ZGVsMxvu_bar(bar_uniform)" }
0 commit comments