[NFC] Fixup newly-added costmodel tests to actually test what they should

LebedevRI · LebedevRI · commit f92961d238ef · 2021-10-05T21:35:47.000+03:00
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll
@@ -13,34 +13,31 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 7 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 15 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 30 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 24 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 48 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 13 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 50 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
-; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction:   %v0 = load i32, i32* %in0, align 2
+; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction:   %v0 = load i32, i32* %in0, align 4
 
 define void @test() {
 entry:
@@ -62,7 +59,7 @@ for.body:
   %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
   store i8 %reduce.add.0.narrow, i8* %out
 
-  %iv.next = add nuw nsw i64 %iv.0, 2
+  %iv.next = add nuw nsw i64 %iv, 2
   %cmp = icmp ult i64 %iv.next, 1024
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
@@ -13,32 +13,29 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 31 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 62 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 47 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 94 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 12 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 47 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 94 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 9 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 144 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction:   %v0 = load i32, i32* %in0, align 4
 
@@ -66,7 +63,7 @@ for.body:
   %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
   store i8 %reduce.add.1.narrow, i8* %out
 
-  %iv.next = add nuw nsw i64 %iv.0, 3
+  %iv.next = add nuw nsw i64 %iv, 3
   %cmp = icmp ult i64 %iv.next, 1024
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
@@ -13,32 +13,31 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 8 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 17 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 34 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 25 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 50 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 21 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 78 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
+;
+; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction:   %v0 = load i32, i32* %in0, align 4
 
 define void @test() {
 entry:
@@ -61,7 +60,7 @@ for.body:
   %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
   store i8 %reduce.add.1.narrow, i8* %out
 
-  %iv.next = add nuw nsw i64 %iv.0, 3
+  %iv.next = add nuw nsw i64 %iv, 3
   %cmp = icmp ult i64 %iv.next, 1024
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
@@ -13,29 +13,28 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 45 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 90 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 32 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 70 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 140 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 17 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 71 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction:   %v0 = load i32, i32* %in0, align 4
 
@@ -67,7 +66,7 @@ for.body:
   %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
   store i8 %reduce.add.2.narrow, i8* %out
 
-  %iv.next = add nuw nsw i64 %iv.0, 4
+  %iv.next = add nuw nsw i64 %iv, 4
   %cmp = icmp ult i64 %iv.next, 1024
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll