[LV] Check for vector-to-scalar casts in legalizer (#106244)

ErikHogeman · web-flow · commit 78e1e6ace6c9 · 2024-09-06T11:20:14.000+02:00
The code makes assumptions later on the operations and their inputs
being scalar in the loops that are processed, so we should make sure
this is the case in the legalizer.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -943,9 +943,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         VecCallVariantsFound = true;
 
       // Check that the instruction return type is vectorizable.
+      // We can't vectorize casts from vector type to scalar type.
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(I.getType()) &&
            !I.getType()->isVoidTy()) ||
+          (isa<CastInst>(I) &&
+           !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
           isa<ExtractElementInst>(I)) {
         reportVectorizationFailure("Found unvectorizable type",
             "instruction return type cannot be vectorized",
diff --git a/llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll b/llvm/test/Transforms/LoopVectorize/vector-to-scalar-cast.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; The test was crashing earlier due to a vectorization attempt; vector-to-scalar cast
+; is now forbidden in the legalizer, and the test isn't a vectorization candidate now.
+; RUN: opt -S -force-widen-divrem-via-safe-divisor=false -force-vector-width=4 --passes=loop-vectorize < %s | FileCheck %s
+
+define void @vector_to_scalar_cast(ptr %out) {
+; CHECK-LABEL: define void @vector_to_scalar_cast(
+; CHECK-SAME: ptr [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VEC0:%.*]] = insertelement <2 x i16> undef, i16 0, i64 0
+; CHECK-NEXT:    [[VEC1:%.*]] = insertelement <2 x i16> [[VEC0]], i16 0, i64 1
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV]], 11
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[VEC_SCALAR_CAST:%.*]] = bitcast <2 x i16> [[VEC1]] to i32
+; CHECK-NEXT:    [[SREM:%.*]] = srem i32 0, [[VEC_SCALAR_CAST]]
+; CHECK-NEXT:    store i32 [[SREM]], ptr [[OUT]], align 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vec0 = insertelement <2 x i16> undef, i16 0, i64 0
+  %vec1 = insertelement <2 x i16> %vec0, i16 0, i64 1
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ 1, %entry ]
+  %exitcond = icmp ne i32 %iv, 11
+  %iv.next = add nuw nsw i32 %iv, 1
+  %vec.scalar.cast = bitcast <2 x i16> %vec1 to i32
+  %srem = srem i32 0, %vec.scalar.cast
+  store i32 %srem, ptr %out
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}