Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 941b1f1

Browse files
committed
[LSV] Skip all non-byte sizes, not only less than eight bits
Summary: The code comments indicate that no effort has been spent on handling load/stores when the size isn't a multiple of the byte size correctly. However, the code only avoided types smaller than 8 bits. So for example a load of an i28 could still be considered as a candidate for vectorization. This patch adjusts the code to behave according to the code comment. The test case used to hit the following assert when trying to use "cast" an i32 to i28 using CreateBitOrPointerCast: opt: ../lib/IR/Instructions.cpp:2565: Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed. #0 PrintStackTraceSignalHandler(void*) #1 SignalHandler(int) #2 __restore_rt #3 __GI_raise #4 __GI_abort #5 __GI___assert_fail #6 llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&, llvm::Instruction*) #7 llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateBitOrPointerCast(llvm::Value*, llvm::Type*, llvm::Twine const&) #8 (anonymous namespace)::Vectorizer::vectorizeLoadChain(llvm::ArrayRef<llvm::Instruction*>, llvm::SmallPtrSet<llvm::Instruction*, 16u>*) Reviewers: arsenm Reviewed By: arsenm Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39295 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316663 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 975b1d7 commit 941b1f1

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
613613
// Skip weird non-byte sizes. They probably aren't worth the effort of
614614
// handling correctly.
615615
unsigned TySize = DL.getTypeSizeInBits(Ty);
616-
if (TySize < 8)
616+
if ((TySize % 8) != 0)
617617
continue;
618618

619619
Value *Ptr = LI->getPointerOperand();
@@ -649,12 +649,14 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
649649
// Skip weird non-byte sizes. They probably aren't worth the effort of
650650
// handling correctly.
651651
unsigned TySize = DL.getTypeSizeInBits(Ty);
652-
if (TySize < 8)
652+
if ((TySize % 8) != 0)
653653
continue;
654654

655655
Value *Ptr = SI->getPointerOperand();
656656
unsigned AS = Ptr->getType()->getPointerAddressSpace();
657657
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
658+
659+
// No point in looking at these if they're too big to vectorize.
658660
if (TySize > VecRegSize / 2)
659661
continue;
660662

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -load-store-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
%rec = type { i32, i28 }
5+
6+
; We currently do not optimize this scenario.
7+
; But we verify that we no longer crash when compiling this.
8+
define void @test1(%rec* %out, %rec* %in) {
9+
; CHECK-LABEL: @test1(
10+
; CHECK-NEXT: [[IN1:%.*]] = getelementptr [[REC:%.*]], %rec* [[IN:%.*]], i16 0, i32 0
11+
; CHECK-NEXT: [[IN2:%.*]] = getelementptr [[REC]], %rec* [[IN]], i16 0, i32 1
12+
; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[IN1]], align 8
13+
; CHECK-NEXT: [[VAL2:%.*]] = load i28, i28* [[IN2]]
14+
; CHECK-NEXT: [[OUT1:%.*]] = getelementptr [[REC]], %rec* [[OUT:%.*]], i16 0, i32 0
15+
; CHECK-NEXT: [[OUT2:%.*]] = getelementptr [[REC]], %rec* [[OUT]], i16 0, i32 1
16+
; CHECK-NEXT: store i32 [[VAL1]], i32* [[OUT1]], align 8
17+
; CHECK-NEXT: store i28 [[VAL2]], i28* [[OUT2]]
18+
; CHECK-NEXT: ret void
19+
;
20+
%in1 = getelementptr %rec, %rec* %in, i16 0, i32 0
21+
%in2 = getelementptr %rec, %rec* %in, i16 0, i32 1
22+
%val1 = load i32, i32* %in1, align 8
23+
%val2 = load i28, i28* %in2
24+
%out1 = getelementptr %rec, %rec* %out, i16 0, i32 0
25+
%out2 = getelementptr %rec, %rec* %out, i16 0, i32 1
26+
store i32 %val1, i32* %out1, align 8
27+
store i28 %val2, i28* %out2
28+
ret void
29+
}
30+

0 commit comments

Comments
 (0)