Skip to content

Commit 06c8210

Browse files
authored
update P7 32-bit partial vector load cost (#108261)
Update cost model to reflect codegen change to use lfiwzx for 32-bit partial vector loads on pwr7 with #104507.
1 parent 487686b commit 06c8210

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -800,13 +800,19 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
800800
// PPCTargetLowering can't compute the cost appropriately. So here we
801801
// explicitly check this case. There are also corresponding store
802802
// instructions.
803-
unsigned MemBytes = Src->getPrimitiveSizeInBits();
804-
if (ST->hasVSX() && IsAltivecType &&
805-
(MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
806-
return 1;
803+
unsigned MemBits = Src->getPrimitiveSizeInBits();
804+
unsigned SrcBytes = LT.second.getStoreSize();
805+
if (ST->hasVSX() && IsAltivecType) {
806+
if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))
807+
return 1;
808+
809+
// Use lfiwax/xxspltw
810+
Align AlignBytes = Alignment ? *Alignment : Align(1);
811+
if (Opcode == Instruction::Load && MemBits == 32 && AlignBytes < SrcBytes)
812+
return 2;
813+
}
807814

808815
// Aligned loads and stores are easy.
809-
unsigned SrcBytes = LT.second.getStoreSize();
810816
if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
811817
return Cost;
812818

llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
1+
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck -DCOST32=1 %s
2+
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -DCOST32=2 %s
23
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
34
target triple = "powerpc64-unknown-linux-gnu"
45

56
define i32 @loads(i32 %arg) {
6-
; CHECK: cost of 1 {{.*}} load
7+
; CHECK: cost of [[COST32]] {{.*}} load
78
load <4 x i8>, ptr undef, align 1
89

910
; CHECK: cost of 1 {{.*}} load
1011
load <8 x i8>, ptr undef, align 1
1112

12-
; CHECK: cost of 1 {{.*}} load
13+
; CHECK: cost of [[COST32]] {{.*}} load
1314
load <2 x i16>, ptr undef, align 2
1415

1516
; CHECK: cost of 1 {{.*}} load

0 commit comments

Comments
 (0)