Skip to content

[GVN] Handle scalable vectors with the same size in VNCoercion #123984

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions llvm/lib/Transforms/Utils/VNCoercion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (StoredTy == LoadTy)
return true;

if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
DL.getTypeSizeInBits(StoredTy) == DL.getTypeSizeInBits(LoadTy))
return true;

// If the loaded/stored value is a first class array/struct, or scalable type,
// don't try to transform them. We need to be able to bitcast to integer.
if (isFirstClassAggregateOrScalableType(LoadTy) ||
Expand Down Expand Up @@ -83,8 +87,8 @@ Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();

uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedValue();
uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedValue();
TypeSize StoredValSize = DL.getTypeSizeInBits(StoredValTy);
TypeSize LoadedValSize = DL.getTypeSizeInBits(LoadedTy);

// If the store and reload are the same size, we can always reuse it.
if (StoredValSize == LoadedValSize) {
Expand Down Expand Up @@ -118,7 +122,8 @@ Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
assert(StoredValSize >= LoadedValSize &&
assert(!StoredValSize.isScalable() &&
TypeSize::isKnownGE(StoredValSize, LoadedValSize) &&
"canCoerceMustAliasedValueToLoad fail");

// Convert source pointers to integers, which can be manipulated.
Expand Down Expand Up @@ -303,6 +308,13 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
return SrcVal;
}

// Return scalable values directly to avoid needing to bitcast to integer
// types, as we do not support non-zero Offsets.
if (isa<ScalableVectorType>(LoadTy)) {
assert(Offset == 0 && "Expected a zero offset for scalable types");
return SrcVal;
}

uint64_t StoreSize =
(DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;
Expand Down Expand Up @@ -333,11 +345,15 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,

Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
Instruction *InsertPt, const DataLayout &DL) {

#ifndef NDEBUG
unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
assert(Offset + LoadSize <= SrcValSize);
TypeSize SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
TypeSize LoadSize = DL.getTypeStoreSize(LoadTy);
assert(SrcValSize.isScalable() == LoadSize.isScalable());
assert((SrcValSize.isScalable() || Offset + LoadSize <= SrcValSize) &&
"Expected Offset + LoadSize <= SrcValSize");
assert(
(!SrcValSize.isScalable() || (Offset == 0 && LoadSize == SrcValSize)) &&
"Expected scalable type sizes to match");
#endif
IRBuilder<> Builder(InsertPt);
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/GVN/vscale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ if.else:
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 16 x i8>
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
Expand All @@ -404,7 +404,7 @@ define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x
define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 4 x float>
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
Expand All @@ -415,7 +415,7 @@ define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale
define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale x 16 x i8> %x) {
; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 16 x i8> [[X]] to <vscale x 4 x float>
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
;
store <vscale x 16 x i8> %x, ptr %p
Expand All @@ -426,7 +426,7 @@ define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale
define <vscale x 4 x i32> @load_v4i32_store_v4f32_forward_load(ptr %p, <vscale x 4 x float> %x) {
; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = bitcast <vscale x 4 x float> [[X]] to <vscale x 4 x i32>
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
;
store <vscale x 4 x float> %x, ptr %p
Expand Down Expand Up @@ -496,7 +496,8 @@ define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, <
define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <vscale x 4 x i32> [[X]] to <vscale x 2 x i64>
; CHECK-NEXT: [[LOAD:%.*]] = inttoptr <vscale x 2 x i64> [[TMP1]] to <vscale x 2 x ptr>
; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
;
store <vscale x 4 x i32> %x, ptr %p
Expand All @@ -507,7 +508,7 @@ define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x
define <vscale x 2 x i64> @load_v2i64_store_v2p0_forward_load(ptr %p, <vscale x 2 x ptr> %x) {
; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = ptrtoint <vscale x 2 x ptr> [[X]] to <vscale x 2 x i64>
; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
;
store <vscale x 2 x ptr> %x, ptr %p
Expand Down Expand Up @@ -540,8 +541,7 @@ define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, <vscale x 4 x i3
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_constant(ptr %p) {
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
; CHECK-NEXT: ret <vscale x 16 x i8> bitcast (<vscale x 4 x i32> splat (i32 4) to <vscale x 16 x i8>)
;
store <vscale x 4 x i32> splat (i32 4), ptr %p
%load = load <vscale x 16 x i8>, ptr %p
Expand Down Expand Up @@ -590,13 +590,13 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
; CHECK-NEXT: [[DOTUNPACK:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT]] to <vscale x 16 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT2]] to <vscale x 16 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT4]] to <vscale x 16 x i8>
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = bitcast <vscale x 4 x i32> [[A_ELT6]] to <vscale x 16 x i8>
; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]])
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
Expand Down
Loading