Skip to content

Commit 208a985

Browse files
committed
[AArch64][GlobalISel] Avoid splitting loads of large vector types into individual element loads (#85042)
This patch fixes an issue with the legalization of G_LOAD where the presence of .lowerIfMemSizeNotByteSizePow2 before .clampMaxNumElements was causing issues for vectors which matched that condition. Such vectors would be lowered into per-element loads instead of being split up into 128-bit chunks.
1 parent 843a978 commit 208a985

File tree

3 files changed

+225
-1302
lines changed

3 files changed

+225
-1302
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
373373
.legalForTypesWithMemDesc(
374374
{{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
375375
.widenScalarToNextPow2(0, /* MinSize = */ 8)
376+
.clampMaxNumElements(0, s8, 16)
377+
.clampMaxNumElements(0, s16, 8)
378+
.clampMaxNumElements(0, s32, 4)
379+
.clampMaxNumElements(0, s64, 2)
380+
.clampMaxNumElements(0, p0, 2)
376381
.lowerIfMemSizeNotByteSizePow2()
377382
.clampScalar(0, s8, s64)
378383
.narrowScalarIf(
@@ -383,11 +388,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
383388
Query.Types[0].getSizeInBits() > 32;
384389
},
385390
changeTo(0, s32))
386-
.clampMaxNumElements(0, s8, 16)
387-
.clampMaxNumElements(0, s16, 8)
388-
.clampMaxNumElements(0, s32, 4)
389-
.clampMaxNumElements(0, s64, 2)
390-
.clampMaxNumElements(0, p0, 2)
391391
// TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
392392
.bitcastIf(typeInSet(0, {v4s8}),
393393
[=](const LegalityQuery &Query) {

llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -711,33 +711,24 @@ body: |
711711
; CHECK: liveins: $x0
712712
; CHECK-NEXT: {{ $}}
713713
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
714-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %ptr(p0) :: (load (p0), align 64)
715-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
714+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load (<2 x s64>), align 64)
715+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>)
716+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
716717
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
717-
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD]](p0) :: (load (p0) from unknown-address + 8)
718-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
718+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
719+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>)
720+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
719721
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
720-
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from unknown-address + 16, align 16)
721-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
722-
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64)
723-
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD2]](p0) :: (load (p0) from unknown-address + 24)
724-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
725-
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64)
726-
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD3]](p0) :: (load (p0) from unknown-address + 32, align 32)
727-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
728-
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64)
729-
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD4]](p0) :: (load (p0) from unknown-address + 40)
730-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD]](p0), [[LOAD1]](p0)
731-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD2]](p0), [[LOAD3]](p0)
732-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD4]](p0), [[LOAD5]](p0)
733-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR]](<2 x p0>)
734-
; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 64)
735-
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
736-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR1]](<2 x p0>)
737-
; CHECK-NEXT: G_STORE [[BITCAST1]](<2 x s64>), [[PTR_ADD5]](p0) :: (store (<2 x s64>) into unknown-address + 16)
738-
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64)
739-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR2]](<2 x p0>)
740-
; CHECK-NEXT: G_STORE [[BITCAST2]](<2 x s64>), [[PTR_ADD6]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
722+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 32, align 32)
723+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD2]](<2 x s64>)
724+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST]](<2 x p0>)
725+
; CHECK-NEXT: G_STORE [[BITCAST3]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 64)
726+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
727+
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST1]](<2 x p0>)
728+
; CHECK-NEXT: G_STORE [[BITCAST4]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into unknown-address + 16)
729+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
730+
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST2]](<2 x p0>)
731+
; CHECK-NEXT: G_STORE [[BITCAST5]](<2 x s64>), [[PTR_ADD3]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
741732
; CHECK-NEXT: RET_ReallyLR
742733
%ptr:_(p0) = COPY $x0
743734
%val:_(<6 x p0>) = G_LOAD %ptr(p0) :: (load (<6 x p0>))

0 commit comments

Comments
 (0)