Skip to content

Commit a1c34a9

Browse files
committed
[ARM] Correct vector predicate type in MVE getCmpSelInstrCost
1 parent b2c7f06 commit a1c34a9

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
943943
BaseT::getScalarizationOverhead(VecCondTy, true, false) +
944944
VecValTy->getNumElements() *
945945
getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
946-
CondTy->getScalarType(), VecPred, CostKind,
946+
VecCondTy->getScalarType(), VecPred, CostKind,
947947
I);
948948
}
949949

llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,4 +237,31 @@ while.end: ; preds = %while.end.loopexit,
237237
ret void
238238
}
239239

240-
attributes #0 = { "target-features"="+mve.fp" }
240+
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp
241+
; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %cmp1 = fcmp
242+
; CHECK: LV: Found an estimated cost of 36 for VF 4 For instruction: %cmp1 = fcmp
243+
define void @floatcmp(float* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 {
244+
entry:
245+
%cmp.not7 = icmp eq i32 %blockSize, 0
246+
br i1 %cmp.not7, label %while.end, label %while.body
247+
248+
while.body: ; preds = %entry, %while.body
249+
%pSrc.addr.010 = phi float* [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ]
250+
%blockSize.addr.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
251+
%pDst.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %pDst, %entry ]
252+
%0 = load float, float* %pSrc.addr.010, align 4
253+
%cmp1 = fcmp nnan ninf nsz olt float %0, 0.000000e+00
254+
%cond = select nnan ninf nsz i1 %cmp1, float 1.000000e+01, float %0
255+
%conv = fptosi float %cond to i32
256+
%incdec.ptr = getelementptr inbounds i32, i32* %pDst.addr.08, i32 1
257+
store i32 %conv, i32* %pDst.addr.08, align 4
258+
%incdec.ptr2 = getelementptr inbounds float, float* %pSrc.addr.010, i32 1
259+
%dec = add i32 %blockSize.addr.09, -1
260+
%cmp.not = icmp eq i32 %dec, 0
261+
br i1 %cmp.not, label %while.end, label %while.body
262+
263+
while.end: ; preds = %while.body, %entry
264+
ret void
265+
}
266+
267+
attributes #0 = { "target-features"="+mve" }

0 commit comments

Comments
 (0)