Skip to content

[LV] Teach LoopVectorizationLegality about struct vector calls #119221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/include/llvm/IR/VectorTypeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ Type *toScalarizedStructTy(StructType *StructTy);
/// are vectors of matching element count. This does not include empty structs.
bool isVectorizedStructTy(StructType *StructTy);

/// Returns true if `StructTy` is an unpacked literal struct where all elements
/// are scalars that can be used as vector element types.
bool canVectorizeStructTy(StructType *StructTy);

/// A helper for converting to vectorized types. For scalar types, this is
/// equivalent to calling `toVectorTy`. For struct types, this returns a new
/// struct where each element type has been widened to a vector type.
Expand Down Expand Up @@ -71,6 +75,18 @@ inline bool isVectorizedTy(Type *Ty) {
return Ty->isVectorTy();
}

/// Returns true if `Ty` is a valid vector element type, void, or an unpacked
/// literal struct where all elements are valid vector element types.
/// Note: Even if a type can be vectorized that does not mean it is valid to do
/// so in all cases. For example, a vectorized struct (as returned by
/// toVectorizedTy) does not perform (de)interleaving, so it can't be used for
/// vectorizing loads/stores.
inline bool canVectorizeTy(Type *Ty) {
if (StructType *StructTy = dyn_cast<StructType>(Ty))
return canVectorizeStructTy(StructTy);
return Ty->isVoidTy() || VectorType::isValidElementType(Ty);
}

/// Returns the types contained in `Ty`. For struct types, it returns the
/// elements, all other types are returned directly.
inline ArrayRef<Type *> getContainedTypes(Type *const &Ty) {
Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ class LoopVectorizationLegality {
/// has a vectorized variant available.
bool hasVectorCallVariants() const { return VecCallVariantsFound; }

/// Returns true if there is at least one function call in the loop which
/// returns a struct type and needs to be vectorized.
bool hasStructVectorCall() const { return StructVecCallFound; }

unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }

Expand Down Expand Up @@ -644,6 +648,12 @@ class LoopVectorizationLegality {
/// the use of those function variants.
bool VecCallVariantsFound = false;

/// If we find a call (to be vectorized) that returns a struct type, record
/// that so we can bail out until this is supported.
/// TODO: Remove this flag once vectorizing calls with struct returns is
/// supported.
bool StructVecCallFound = false;

/// Indicates whether this loop has an uncountable early exit, i.e. an
/// uncountable exiting block that is not the latch.
bool HasUncountableEarlyExit = false;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/IR/VectorTypeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,11 @@ bool llvm::isVectorizedStructTy(StructType *StructTy) {
return Ty->isVectorTy() && cast<VectorType>(Ty)->getElementCount() == VF;
});
}

/// Returns true if `StructTy` is an unpacked literal struct where all elements
/// are scalars that can be used as vector element types.
bool llvm::canVectorizeStructTy(StructType *StructTy) {
auto ElemTys = StructTy->elements();
return !ElemTys.empty() && isUnpackedStructLiteral(StructTy) &&
all_of(ElemTys, VectorType::isValidElementType);
}
34 changes: 32 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,18 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
return Scalarize;
}

/// Returns true if the call return type `Ty` can be widened by the loop
/// vectorizer.
static bool canWidenCallReturnType(Type *Ty) {
auto *StructTy = dyn_cast<StructType>(Ty);
// TODO: Remove the homogeneous types restriction. This is just an initial
// simplification. When we want to support things like the overflow intrinsics
// we will have to lift this restriction.
if (StructTy && !StructTy->containsHomogeneousTypes())
return false;
return canVectorizeTy(StructTy);
}

bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();

Expand Down Expand Up @@ -942,11 +954,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI && !VFDatabase::getMappings(*CI).empty())
VecCallVariantsFound = true;

auto CanWidenInstructionTy = [this](Instruction const &Inst) {
Type *InstTy = Inst.getType();
if (!isa<StructType>(InstTy))
return canVectorizeTy(InstTy);

// For now, we only recognize struct values returned from calls where
// all users are extractvalue as vectorizable. All element types of the
// struct must be types that can be widened.
if (isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
all_of(Inst.users(), IsaPred<ExtractValueInst>)) {
// TODO: Remove the `StructVecCallFound` flag once vectorizing calls
// with struct returns is supported.
StructVecCallFound = true;
return true;
}

return false;
};

// Check that the instruction return type is vectorizable.
// We can't vectorize casts from vector type to scalar type.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(I.getType()) &&
!I.getType()->isVoidTy()) ||
if (!CanWidenInstructionTy(I) ||
(isa<CastInst>(I) &&
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
isa<ExtractElementInst>(I)) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10348,6 +10348,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (LVL.hasStructVectorCall()) {
reportVectorizationFailure("Auto-vectorization of calls that return struct "
"types is not yet supported",
"StructCallVectorizationUnsupported", ORE, L);
return false;
}

// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s
; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s

target triple = "aarch64-unknown-linux-gnu"

; Tests basic vectorization of scalable homogeneous struct literal returns.

; TODO: Support vectorization in this case.
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
; CHECK-LABEL: define void @struct_return_f32_widen
; CHECK-NOT: vector.body:
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
%in_val = load float, ptr %arrayidx, align 4
%call = tail call { float, float } @foo(float %in_val) #0
%extract_a = extractvalue { float, float } %call, 0
%extract_b = extractvalue { float, float } %call, 1
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
store float %extract_a, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
store float %extract_b, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

; TODO: Support vectorization in this case.
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
; CHECK-LABEL: define void @struct_return_f64_widen
; CHECK-NOT: vector.body:
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
%in_val = load double, ptr %arrayidx, align 8
%call = tail call { double, double } @bar(double %in_val) #1
%extract_a = extractvalue { double, double } %call, 0
%extract_b = extractvalue { double, double } %call, 1
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
store double %extract_a, ptr %arrayidx2, align 8
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
store double %extract_b, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

; TODO: Support vectorization in this case.
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
; CHECK-NOT: vector.body:
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
%in_val = load float, ptr %arrayidx, align 4
%call = tail call { float, float } @foo(float %in_val) #0
%extract_a = extractvalue { float, float } %call, 0
%extract_b = extractvalue { float, float } %call, 1
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
store float %extract_a, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
store float %extract_b, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

declare { float, float } @foo(float)
declare { double, double } @bar(double)

declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
declare { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double>, <vscale x 2 x i1>)


attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" }
Loading
Loading