Skip to content

Commit db848fb

Browse files
committed
[NFC][LV][X86] Improve test coverage for masked mem ops
1 parent 0d9b478 commit db848fb

19 files changed

+925
-16
lines changed

llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ target triple = "x86_64-unknown-linux-gnu"
5757
; AVX512: LV: Found an estimated cost of 444 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
5858
; AVX512: LV: Found an estimated cost of 888 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2
5959
;
60-
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i16, i16* %inB, align 4
60+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i16, i16* %inB, align 2
6161
define void @test() {
6262
entry:
6363
br label %for.body

llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ target triple = "x86_64-unknown-linux-gnu"
5757
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
5858
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB = load i64, i64* %inB, align 8
5959
;
60-
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i8, i8* %in0, align 2
60+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i64, i64* %inB, align 8
6161

6262
define void @test() {
6363
entry:

llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ target triple = "x86_64-unknown-linux-gnu"
5757
; AVX512: LV: Found an estimated cost of 442 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
5858
; AVX512: LV: Found an estimated cost of 884 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1
5959
;
60-
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i8, i8* %inB, align 4
60+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i8, i8* %inB, align 1
6161
define void @test() {
6262
entry:
6363
br label %for.body

llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ target triple = "x86_64-unknown-linux-gnu"
5757
; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
5858
; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: %valB.loaded = load i32, i32* %inB, align 4
5959
;
60-
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB = load i32, i32* %inB, align 4
60+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB.loaded = load i32, i32* %inB, align 4
6161
define void @test() {
6262
entry:
6363
br label %for.body
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2
2+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse42 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE42
3+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1
4+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2-SLOWGATHER
5+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2-FASTGATHER
6+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
7+
8+
; REQUIRES: asserts
9+
10+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11+
target triple = "x86_64-unknown-linux-gnu"
12+
13+
@A = global [1024 x i8] zeroinitializer, align 128
14+
@B = global [1024 x i64] zeroinitializer, align 128
15+
@C = global [1024 x i64] zeroinitializer, align 128
16+
17+
; CHECK: LV: Checking a loop in "test"
18+
;
19+
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
20+
; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
21+
; SSE2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
22+
; SSE2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
23+
; SSE2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
24+
;
25+
; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
26+
; SSE42: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
27+
; SSE42: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
28+
; SSE42: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
29+
; SSE42: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
30+
;
31+
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
32+
; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
33+
; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
34+
; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
35+
; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
36+
; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
37+
;
38+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
39+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
40+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
41+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
42+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
43+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
44+
;
45+
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
46+
; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
47+
; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
48+
; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
49+
; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
50+
; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
51+
;
52+
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
53+
; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
54+
; AVX512: LV: Found an estimated cost of 24 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
55+
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
56+
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
57+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
58+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB.loaded = load i64, i64* %inB, align 8
59+
;
60+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB.loaded = load i64, i64* %inB, align 8
61+
define void @test() {
62+
entry:
63+
br label %for.body
64+
65+
for.body:
66+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
67+
68+
%inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
69+
%valA = load i8, i8* %inA
70+
%canLoad = icmp ne i8 %valA, 0
71+
br i1 %canLoad, label %load, label %mask
72+
73+
load:
74+
%valA.ext = sext i8 %valA to i64
75+
%inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %valA.ext
76+
%valB.loaded = load i64, i64* %inB
77+
br label %end
78+
79+
mask:
80+
br label %end
81+
82+
end:
83+
%valB = phi i64 [ %valB.loaded, %load ], [ 0, %mask ]
84+
%out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %iv
85+
store i64 %valB, i64* %out
86+
87+
%iv.next = add nuw nsw i64 %iv, 1
88+
%cmp = icmp ult i64 %iv.next, 1024
89+
br i1 %cmp, label %for.body, label %for.cond.cleanup
90+
91+
for.cond.cleanup:
92+
ret void
93+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2
2+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse42 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE42
3+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1
4+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2-SLOWGATHER
5+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2-FASTGATHER
6+
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
7+
8+
; REQUIRES: asserts
9+
10+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11+
target triple = "x86_64-unknown-linux-gnu"
12+
13+
@A = global [1024 x i8] zeroinitializer, align 128
14+
@C = global [1024 x i16] zeroinitializer, align 128
15+
16+
; CHECK: LV: Checking a loop in "test"
17+
;
18+
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
19+
; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
20+
; SSE2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
21+
; SSE2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
22+
; SSE2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
23+
;
24+
; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
25+
; SSE42: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
26+
; SSE42: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
27+
; SSE42: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
28+
; SSE42: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
29+
;
30+
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
31+
; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
32+
; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
33+
; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
34+
; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
35+
; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
36+
;
37+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
38+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
39+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
40+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
41+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
42+
; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
43+
;
44+
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
45+
; AVX2-FASTGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
46+
; AVX2-FASTGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
47+
; AVX2-FASTGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
48+
; AVX2-FASTGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
49+
; AVX2-FASTGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
50+
;
51+
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
52+
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
53+
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
54+
; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
55+
; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
56+
; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
57+
; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: %valB.loaded = load i16, i16* %inB, align 2
58+
;
59+
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %valB.loaded = load i16, i16* %inB, align 2
60+
define void @test([1024 x i16]* %B) {
61+
entry:
62+
br label %for.body
63+
64+
for.body:
65+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
66+
67+
%inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
68+
%valA = load i8, i8* %inA
69+
%canLoad = icmp ne i8 %valA, 0
70+
br i1 %canLoad, label %load, label %mask
71+
72+
load:
73+
%inB = getelementptr inbounds [1024 x i16], [1024 x i16]* %B, i64 0, i64 %iv
74+
%valB.loaded = load i16, i16* %inB
75+
br label %end
76+
77+
mask:
78+
br label %end
79+
80+
end:
81+
%valB = phi i16 [ %valB.loaded, %load ], [ 0, %mask ]
82+
%out = getelementptr inbounds [1024 x i16], [1024 x i16]* @C, i64 0, i64 %iv
83+
store i16 %valB, i16* %out
84+
85+
%iv.next = add nuw nsw i64 %iv, 1
86+
%cmp = icmp ult i64 %iv.next, 1024
87+
br i1 %cmp, label %for.body, label %for.cond.cleanup
88+
89+
for.cond.cleanup:
90+
ret void
91+
}

0 commit comments

Comments
 (0)