Skip to content

Commit 064981f

Browse files
author
Anna Welker
committed
[ARM][MVE] Enable MVE gathers and scatters by default
Enable MVE gather/scatters by default, which requires some minor adaptations in some tests. Differential revision: https://reviews.llvm.org/D86776
1 parent 627e900 commit 064981f

26 files changed

+41
-36
lines changed

llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ using namespace llvm;
4747
#define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
4848

4949
cl::opt<bool> EnableMaskedGatherScatters(
50-
"enable-arm-maskedgatscat", cl::Hidden, cl::init(false),
50+
"enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
5151
cl::desc("Enable the generation of masked gathers and scatters"));
5252

5353
namespace {

llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze -enable-arm-maskedgatscat | FileCheck %s
2+
; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s
33

44
define i32 @masked_gather() {
55
; CHECK-LABEL: 'masked_gather'

llvm/test/CodeGen/Thumb2/mve-gather-increment.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <4 x i32> @gather_inc_mini_4i32(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, <4 x i32> %offs) {
55
; CHECK-LABEL: gather_inc_mini_4i32:

llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16(i16* %base, <8 x i16>* %offptr) {
55
; CHECK-LABEL: scaled_v8i16_i16:

llvm/test/CodeGen/Thumb2/mve-gather-ind16-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <8 x i16> @zext_unscaled_i8_i16(i8* %base, <8 x i16>* %offptr) {
55
; CHECK-LABEL: zext_unscaled_i8_i16:

llvm/test/CodeGen/Thumb2/mve-gather-ind32-scaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
33

44
define arm_aapcs_vfpcc <4 x i32> @zext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr) {
55
; CHECK-LABEL: zext_scaled_i16_i32:

llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
33

44
define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) {
55
; CHECK-LABEL: zext_unscaled_i8_i32:

llvm/test/CodeGen/Thumb2/mve-gather-ind8-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8(i8* %base, <16 x i8>* %offptr) {
55
; CHECK-LABEL: unscaled_v16i8_i8:

llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22

33

4-
; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -S -o 2>/dev/null - | FileCheck %s
4+
; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -S -o 2>/dev/null - | FileCheck %s
55

66
define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
77
; CHECK-LABEL: @push_out_add_sub_block(

llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s
33

44
; i32
55

llvm/test/CodeGen/Thumb2/mve-gather-scatter-opt.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck --check-prefix NOGATSCAT %s
3-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=-mve -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck --check-prefix NOMVE %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false %s -o 2>/dev/null - | FileCheck --check-prefix NOGATSCAT %s
3+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=-mve %s -o 2>/dev/null - | FileCheck --check-prefix NOMVE %s
44

55
define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32_gather(i8* %base, <4 x i32>* %offptr) {
66
; NOGATSCAT-LABEL: unscaled_i32_i32_gather:
@@ -34,8 +34,6 @@ define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32_gather(i8* %base, <4 x i32>*
3434
; NOMVE-NEXT: mov r0, r12
3535
; NOMVE-NEXT: pop {r4, pc}
3636

37-
38-
3937
entry:
4038
%offs = load <4 x i32>, <4 x i32>* %offptr, align 4
4139
%byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
@@ -82,7 +80,6 @@ define arm_aapcs_vfpcc void @unscaled_i32_i8_scatter(i8* %base, <4 x i8>* %offpt
8280
; NOMVE-NEXT: str r2, [r0, r1]
8381
; NOMVE-NEXT: pop {r4, pc}
8482

85-
8683
entry:
8784
%offs = load <4 x i8>, <4 x i8>* %offptr, align 1
8885
%offs.zext = zext <4 x i8> %offs to <4 x i32>

llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22

3-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
3+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
44

55
!0 = !{i32 1, !"wchar_size", i32 4}
66
!1 = !{i32 1, !"min_enum_size", i32 4}

llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22

3-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
3+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
44

55
define void @ptr_iv_v4i32(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
66
; CHECK-LABEL: ptr_iv_v4i32:

llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-mem-access-versioning=false -enable-arm-maskedgatscat -tail-predication=force-enabled %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-mem-access-versioning=false -tail-predication=force-enabled %s -o - | FileCheck %s
33

44
define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) {
55
; CHECK-LABEL: mve_gather_qi_wb:

llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-arm-maskedgatscat -tail-predication=force-enabled %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -tail-predication=force-enabled %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n) {
55
; CHECK-LABEL: gather_inc_v4i32_simple:

llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s
33

44

55
define arm_aapcs_vfpcc void @scatter_inc_minipred_4i32(<4 x i32> %data, i32* %dst, <4 x i32> %offs) {

llvm/test/CodeGen/Thumb2/mve-scatter-ind16-scaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
; VLDRH.16 Qd, [base, offs, uxtw #1]
55
define arm_aapcs_vfpcc void @scaled_v8i16_i16(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {

llvm/test/CodeGen/Thumb2/mve-scatter-ind16-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
; VLDRB.u16 Qd, [base, offs]
55
define arm_aapcs_vfpcc void @ext_unscaled_i8_i16(i8* %base, <8 x i16>* %offptr, <8 x i16> %input) {

llvm/test/CodeGen/Thumb2/mve-scatter-ind32-scaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
33

44
; VLDRH.u32 Qd, [base, offs, #uxtw #1]
55
define arm_aapcs_vfpcc void @ext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr, <4 x i32> %input) {

llvm/test/CodeGen/Thumb2/mve-scatter-ind32-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o 2>/dev/null - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s
33

44
; VLDRB.u32 Qd, [base, offs]
55
define arm_aapcs_vfpcc void @ext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr, <4 x i32> %input) {

llvm/test/CodeGen/Thumb2/mve-scatter-ind8-unscaled.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
33

44
; VLDRB.8
55
define arm_aapcs_vfpcc void @unscaled_v16i8_i8(i8* %base, <16 x i8>* %offptr, <16 x i8> %input) {

llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-arm-maskedgatscat %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s
33

44
; i32
55

llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -loop-vectorize -force-vector-width=4 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat -tail-predication=force-enabled -S %s -o - | FileCheck %s
2+
; RUN: opt -loop-vectorize -force-vector-width=4 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled -S %s -o - | FileCheck %s
33

44
define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) {
55
; CHECK-LABEL: @test_stride1_4i32(

llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,12 @@ entry:
417417
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
418418
; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
419419
; VF_4-LABEL: Checking a loop in "i32_factor_3"
420-
; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
420+
; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
421421
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
422422
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
423423
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
424424
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
425-
; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
425+
; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
426426
; VF_8-LABEL: Checking a loop in "i32_factor_3"
427427
; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
428428
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
@@ -573,12 +573,12 @@ entry:
573573
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
574574
; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
575575
; VF_4-LABEL: Checking a loop in "f32_factor_3"
576-
; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4
576+
; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4
577577
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4
578578
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4
579579
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
580580
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
581-
; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
581+
; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
582582
; VF_8-LABEL: Checking a loop in "f32_factor_3"
583583
; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4
584584
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4
@@ -808,14 +808,14 @@ entry:
808808
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
809809
; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
810810
; VF_4-LABEL: Checking a loop in "i32_factor_4"
811-
; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
811+
; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
812812
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
813813
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
814814
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
815815
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
816816
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
817817
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
818-
; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
818+
; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
819819
; VF_8-LABEL: Checking a loop in "i32_factor_4"
820820
; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
821821
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
@@ -997,14 +997,14 @@ entry:
997997
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
998998
; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4
999999
; VF_4-LABEL: Checking a loop in "f32_factor_4"
1000-
; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
1000+
; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
10011001
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4
10021002
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4
10031003
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4
10041004
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
10051005
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
10061006
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
1007-
; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
1007+
; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
10081008
; VF_8-LABEL: Checking a loop in "f32_factor_4"
10091009
; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
10101010
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4

llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -loop-vectorize -S -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,mve1beat -dce -instcombine --simplifycfg -enable-arm-maskedgatscat < %s | FileCheck %s
2+
; RUN: opt -loop-vectorize -S -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,mve1beat -dce -instcombine --simplifycfg < %s | FileCheck %s
33

44
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
55
target triple = "thumbv8.1m.main-none-none-eabi"

0 commit comments

Comments
 (0)