Skip to content

Commit 1b13849

Browse files
committed
[AArch64] Add bf16 broadcast and transpose costs
These are only based on the size of the element, not the type (although the codegen does need to account for it).
1 parent ccaf69a commit 1b13849

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5607,6 +5607,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
56075607
{TTI::SK_Broadcast, MVT::v2i64, 1},
56085608
{TTI::SK_Broadcast, MVT::v4f16, 1},
56095609
{TTI::SK_Broadcast, MVT::v8f16, 1},
5610+
{TTI::SK_Broadcast, MVT::v4bf16, 1},
5611+
{TTI::SK_Broadcast, MVT::v8bf16, 1},
56105612
{TTI::SK_Broadcast, MVT::v2f32, 1},
56115613
{TTI::SK_Broadcast, MVT::v4f32, 1},
56125614
{TTI::SK_Broadcast, MVT::v2f64, 1},
@@ -5621,6 +5623,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
56215623
{TTI::SK_Transpose, MVT::v2i64, 1},
56225624
{TTI::SK_Transpose, MVT::v4f16, 1},
56235625
{TTI::SK_Transpose, MVT::v8f16, 1},
5626+
{TTI::SK_Transpose, MVT::v4bf16, 1},
5627+
{TTI::SK_Transpose, MVT::v8bf16, 1},
56245628
{TTI::SK_Transpose, MVT::v2f32, 1},
56255629
{TTI::SK_Transpose, MVT::v4f32, 1},
56265630
{TTI::SK_Transpose, MVT::v2f64, 1},

llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ define void @broadcast() {
3131
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
3232
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
3333
; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1bf16 = shufflevector <1 x bfloat> undef, <1 x bfloat> undef, <1 x i32> zeroinitializer
34-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> zeroinitializer
34+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> zeroinitializer
3535
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> zeroinitializer
36-
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:14 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> zeroinitializer
37-
; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:14 Lat:28 SizeLat:28 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> zeroinitializer
36+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> zeroinitializer
37+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> zeroinitializer
3838
; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1f32 = shufflevector <1 x float> undef, <1 x float> undef, <1 x i32> zeroinitializer
3939
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
4040
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
@@ -116,8 +116,8 @@ define void @broadcast_half() {
116116
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <8 x i32> zeroinitializer
117117
; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <1 x i32> zeroinitializer
118118
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <2 x i32> zeroinitializer
119-
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:14 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <4 x i32> zeroinitializer
120-
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:14 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <8 x i32> zeroinitializer
119+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <4 x i32> zeroinitializer
120+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <8 x i32> zeroinitializer
121121
; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <1 x i32> zeroinitializer
122122
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <2 x i32> zeroinitializer
123123
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <4 x i32> zeroinitializer
@@ -194,9 +194,9 @@ define void @broadcast_double() {
194194
; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <32 x i32> zeroinitializer
195195
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1bf16 = shufflevector <1 x bfloat> undef, <1 x bfloat> undef, <2 x i32> zeroinitializer
196196
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <4 x i32> zeroinitializer
197-
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:14 SizeLat:14 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <8 x i32> zeroinitializer
198-
; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:14 Lat:28 SizeLat:28 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <16 x i32> zeroinitializer
199-
; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:28 Lat:56 SizeLat:56 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <32 x i32> zeroinitializer
197+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <8 x i32> zeroinitializer
198+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <16 x i32> zeroinitializer
199+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <32 x i32> zeroinitializer
200200
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1f32 = shufflevector <1 x float> undef, <1 x float> undef, <2 x i32> zeroinitializer
201201
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> zeroinitializer
202202
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> zeroinitializer

llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,39 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
218218
%tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
219219
ret <8 x half> %tmp0
220220
}
221+
222+
define <4 x bfloat> @trn1.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) {
223+
; CHECK-LABEL: 'trn1.v4bf16'
224+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
225+
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %tmp0
226+
;
227+
%tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
228+
ret <4 x bfloat> %tmp0
229+
}
230+
231+
define <4 x bfloat> @trn2.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) {
232+
; CHECK-LABEL: 'trn2.v4bf16'
233+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
234+
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %tmp0
235+
;
236+
%tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
237+
ret <4 x bfloat> %tmp0
238+
}
239+
240+
define <8 x bfloat> @trn1.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) {
241+
; CHECK-LABEL: 'trn1.v8bf16'
242+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
243+
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %tmp0
244+
;
245+
%tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
246+
ret <8 x bfloat> %tmp0
247+
}
248+
249+
define <8 x bfloat> @trn2.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) {
250+
; CHECK-LABEL: 'trn2.v8bf16'
251+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
252+
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %tmp0
253+
;
254+
%tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
255+
ret <8 x bfloat> %tmp0
256+
}

0 commit comments

Comments
 (0)