Skip to content

Commit f2e0f08

Browse files
committed
[AArch64] Add MATCH loops to LoopIdiomVectorizePass
This patch adds a new loop to LoopIdiomVectorizePass, enabling it to recognise and use @llvm.experimental.vector.match to vectorise loops such as: char* find_first_of(char *first, char *last, char *s_first, char *s_last) { for (; first != last; ++first) for (char *it = s_first; it != s_last; ++it) if (*first == *it) return first; return last; } These loops match the C++ standard library's std::find_first_of.
1 parent 46f43b6 commit f2e0f08

File tree

4 files changed

+559
-10
lines changed

4 files changed

+559
-10
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,9 @@ class TargetTransformInfoImplBase {
775775
default:
776776
break;
777777
case Intrinsic::experimental_vector_histogram_add:
778-
// For now, we want explicit support from the target for histograms.
778+
case Intrinsic::experimental_vector_match:
779+
// For now, we want explicit support from the target for histograms and
780+
// matches.
779781
return InstructionCost::getInvalid();
780782
case Intrinsic::allow_runtime_check:
781783
case Intrinsic::allow_ubsan_check:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,23 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
914914
}
915915
break;
916916
}
917+
case Intrinsic::experimental_vector_match: {
918+
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
919+
unsigned SearchSize =
920+
cast<FixedVectorType>(ICA.getArgTypes()[1])->getNumElements();
921+
// If we can't lower to MATCH, return an invalid cost.
922+
if (getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
923+
return InstructionCost::getInvalid();
924+
// Base cost for MATCH instructions. At least on the Neoverse V2 and
925+
// Neoverse V3 these are cheap operations with the same latency as a vector
926+
// ADD, though in most cases we also need to do an extra DUP.
927+
InstructionCost Cost = 4;
928+
// For fixed-length vectors we currently need an extra five--six
929+
// instructions besides the MATCH.
930+
if (isa<FixedVectorType>(RetTy))
931+
Cost += 6;
932+
return Cost;
933+
}
917934
default:
918935
break;
919936
}

0 commit comments

Comments
 (0)