Skip to content

Commit 3b82397

Browse files
committed
[VectorCombine] Check for non-byte-sized element type
We should check whether the element type is non-byte-sized, not the vector type. For types like <32 x i1> the whole type is byte-sized, but the individual elements (that we scalarize to) are not. Fixes #67060.
1 parent 95606a5 commit 3b82397

File tree

3 files changed

+7
-8
lines changed

3 files changed

+7
-8
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,7 +1206,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
12061206
// Don't optimize for atomic/volatile load or store. Ensure memory is not
12071207
// modified between, vector type matches store size, and index is inbounds.
12081208
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
1209-
!DL.typeSizeEqualsStoreSize(Load->getType()) ||
1209+
!DL.typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
12101210
SrcAddr != SI->getPointerOperand()->stripPointerCasts())
12111211
return false;
12121212

@@ -1244,7 +1244,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
12441244
auto *VecTy = cast<VectorType>(I.getType());
12451245
auto *LI = cast<LoadInst>(&I);
12461246
const DataLayout &DL = I.getModule()->getDataLayout();
1247-
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy))
1247+
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy->getScalarType()))
12481248
return false;
12491249

12501250
InstructionCost OriginalCost =

llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,11 +656,10 @@ define i31 @load_with_non_power_of_2_element_type(ptr %x) {
656656
ret i31 %r
657657
}
658658

659-
; FIXME: This is a miscompile.
660659
define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
661660
; CHECK-LABEL: @load_with_non_power_of_2_element_type_2(
662-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i1>, ptr [[X:%.*]], i32 0, i32 1
663-
; CHECK-NEXT: [[R:%.*]] = load i1, ptr [[TMP1]], align 1
661+
; CHECK-NEXT: [[LV:%.*]] = load <8 x i1>, ptr [[X:%.*]], align 1
662+
; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[LV]], i32 1
664663
; CHECK-NEXT: ret i1 [[R]]
665664
;
666665
%lv = load <8 x i1>, ptr %x

llvm/test/Transforms/VectorCombine/load-insert-store.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ entry:
107107
ret void
108108
}
109109

110-
; FIXME: This is a miscompile.
111110
define void @insert_store_v32i1(ptr %p) {
112111
; CHECK-LABEL: @insert_store_v32i1(
113-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <32 x i1>, ptr [[P:%.*]], i64 0, i64 0
114-
; CHECK-NEXT: store i1 true, ptr [[TMP1]], align 4
112+
; CHECK-NEXT: [[VEC:%.*]] = load <32 x i1>, ptr [[P:%.*]], align 4
113+
; CHECK-NEXT: [[INS:%.*]] = insertelement <32 x i1> [[VEC]], i1 true, i64 0
114+
; CHECK-NEXT: store <32 x i1> [[INS]], ptr [[P]], align 4
115115
; CHECK-NEXT: ret void
116116
;
117117
%vec = load <32 x i1>, ptr %p

0 commit comments

Comments
 (0)