Skip to content

Commit 0160e7a

Browse files
committed
[AArch64] Set MaxInterleaving to 4 for Neoverse V2 and V3
This helps loop based benchmarks quite a lot, SPEC INT is unaffected.
1 parent 1feef92 commit 0160e7a

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
233233
PrefLoopAlignment = Align(32);
234234
MaxBytesForLoopAlignment = 16;
235235
break;
236-
case NeoverseN2:
237-
case NeoverseN3:
238236
case NeoverseV2:
239237
case NeoverseV3:
238+
MaxInterleaveFactor = 4;
239+
LLVM_FALLTHROUGH;
240+
case NeoverseN2:
241+
case NeoverseN3:
240242
PrefFunctionAlignment = Align(16);
241243
PrefLoopAlignment = Align(32);
242244
MaxBytesForLoopAlignment = 16;

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a14 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
66
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a15 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
77
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a16 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
8+
; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v2 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
9+
; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v3 -S %s | FileCheck --check-prefix=INTERLEAVE-4-VLA %s
810

911
; Tests for selecting interleave counts for loops with loads and stores.
1012

@@ -225,6 +227,12 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8
225227
; INTERLEAVE-2: exit:
226228
; INTERLEAVE-2-NEXT: ret void
227229
;
230+
; INTERLEAVE-4-VLA-LABEL: @interleave_single_load_store(
231+
; INTERLEAVE-4-VLA: call <vscale x 16 x i8> @llvm.smax.nxv16i8(
232+
; INTERLEAVE-4-VLA-NEXT: call <vscale x 16 x i8> @llvm.smax.nxv16i8(
233+
; INTERLEAVE-4-VLA-NEXT: call <vscale x 16 x i8> @llvm.smax.nxv16i8(
234+
; INTERLEAVE-4-VLA-NEXT: call <vscale x 16 x i8> @llvm.smax.nxv16i8(
235+
;
228236
entry:
229237
br label %loop
230238

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a14 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
66
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a15 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
77
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-macos -mcpu=apple-a16 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
8+
; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v2 -S %s | FileCheck --check-prefix=INTERLEAVE-4 %s
9+
; RUN: opt -passes=loop-vectorize -mtriple=arm64 -mcpu=neoverse-v3 -S %s | FileCheck --check-prefix=INTERLEAVE-4-VLA %s
810

911
; Tests for selecting the interleave count for loops with reductions.
1012

@@ -117,6 +119,12 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
117119
; INTERLEAVE-2-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
118120
; INTERLEAVE-2-NEXT: ret i32 [[RED_NEXT_LCSSA]]
119121
;
122+
; INTERLEAVE-4-VLA-LABEL: @interleave_integer_reduction(
123+
; INTERLEAVE-4-VLA: add <vscale x 4 x i32>
124+
; INTERLEAVE-4-VLA-NEXT: add <vscale x 4 x i32>
125+
; INTERLEAVE-4-VLA-NEXT: add <vscale x 4 x i32>
126+
; INTERLEAVE-4-VLA-NEXT: add <vscale x 4 x i32>
127+
;
120128
entry:
121129
br label %loop
122130

0 commit comments

Comments
 (0)