Skip to content

Commit ec953a0

Browse files
committed
[AArch64][GlobalISel] Avoid splitting loads of large vector types into individual element loads
This patch adds custom legalization for G_LOAD where it splits loads of fixed-width vector types larger than 128 bits into loads of 128-bit vectors with the same element type. This is an improvement to what was being done before where loads would be split into individual loads for each element of the vector.
1 parent beec23e commit ec953a0

File tree

3 files changed

+225
-1302
lines changed

3 files changed

+225
-1302
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
373373
.legalForTypesWithMemDesc(
374374
{{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
375375
.widenScalarToNextPow2(0, /* MinSize = */ 8)
376+
.clampMaxNumElements(0, s8, 16)
377+
.clampMaxNumElements(0, s16, 8)
378+
.clampMaxNumElements(0, s32, 4)
379+
.clampMaxNumElements(0, s64, 2)
380+
.clampMaxNumElements(0, p0, 2)
376381
.lowerIfMemSizeNotByteSizePow2()
377382
.clampScalar(0, s8, s64)
378383
.narrowScalarIf(
@@ -383,11 +388,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
383388
Query.Types[0].getSizeInBits() > 32;
384389
},
385390
changeTo(0, s32))
386-
.clampMaxNumElements(0, s8, 16)
387-
.clampMaxNumElements(0, s16, 8)
388-
.clampMaxNumElements(0, s32, 4)
389-
.clampMaxNumElements(0, s64, 2)
390-
.clampMaxNumElements(0, p0, 2)
391391
// TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
392392
.bitcastIf(typeInSet(0, {v4s8}),
393393
[=](const LegalityQuery &Query) {

llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -711,33 +711,24 @@ body: |
711711
; CHECK: liveins: $x0
712712
; CHECK-NEXT: {{ $}}
713713
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
714-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %ptr(p0) :: (load (p0), align 64)
715-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
714+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load (<2 x s64>), align 64)
715+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>)
716+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
716717
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
717-
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD]](p0) :: (load (p0) from unknown-address + 8)
718-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
718+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
719+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>)
720+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
719721
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
720-
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from unknown-address + 16, align 16)
721-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
722-
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64)
723-
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD2]](p0) :: (load (p0) from unknown-address + 24)
724-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
725-
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64)
726-
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD3]](p0) :: (load (p0) from unknown-address + 32, align 32)
727-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
728-
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64)
729-
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD4]](p0) :: (load (p0) from unknown-address + 40)
730-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD]](p0), [[LOAD1]](p0)
731-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD2]](p0), [[LOAD3]](p0)
732-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[LOAD4]](p0), [[LOAD5]](p0)
733-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR]](<2 x p0>)
734-
; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 64)
735-
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
736-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR1]](<2 x p0>)
737-
; CHECK-NEXT: G_STORE [[BITCAST1]](<2 x s64>), [[PTR_ADD5]](p0) :: (store (<2 x s64>) into unknown-address + 16)
738-
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64)
739-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR2]](<2 x p0>)
740-
; CHECK-NEXT: G_STORE [[BITCAST2]](<2 x s64>), [[PTR_ADD6]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
722+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 32, align 32)
723+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD2]](<2 x s64>)
724+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST]](<2 x p0>)
725+
; CHECK-NEXT: G_STORE [[BITCAST3]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 64)
726+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
727+
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST1]](<2 x p0>)
728+
; CHECK-NEXT: G_STORE [[BITCAST4]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into unknown-address + 16)
729+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
730+
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST2]](<2 x p0>)
731+
; CHECK-NEXT: G_STORE [[BITCAST5]](<2 x s64>), [[PTR_ADD3]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
741732
; CHECK-NEXT: RET_ReallyLR
742733
%ptr:_(p0) = COPY $x0
743734
%val:_(<6 x p0>) = G_LOAD %ptr(p0) :: (load (<6 x p0>))

0 commit comments

Comments
 (0)