18
18
define i32 @reduce_and4 (i32 %acc , <4 x i32 > %v1 , <4 x i32 > %v2 , <4 x i32 > %v3 , <4 x i32 > %v4 ) {
19
19
; SSE2-LABEL: @reduce_and4(
20
20
; SSE2-NEXT: entry:
21
- ; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22
- ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
23
- ; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
24
- ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0 ]])
25
- ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP3]]
21
+ ; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22
+ ; SSE2-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
23
+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
24
+ ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2 ]])
25
+ ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1 ]], [[TMP3]]
26
26
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
27
27
; SSE2-NEXT: ret i32 [[OP_RDX1]]
28
28
;
@@ -40,11 +40,11 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <
40
40
;
41
41
; AVX-LABEL: @reduce_and4(
42
42
; AVX-NEXT: entry:
43
- ; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44
- ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
45
- ; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
46
- ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0 ]])
47
- ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP3]]
43
+ ; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44
+ ; AVX-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
45
+ ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
46
+ ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2 ]])
47
+ ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1 ]], [[TMP3]]
48
48
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
49
49
; AVX-NEXT: ret i32 [[OP_RDX1]]
50
50
;
@@ -94,11 +94,11 @@ entry:
94
94
95
95
define i32 @reduce_and4_transpose (i32 %acc , <4 x i32 > %v1 , <4 x i32 > %v2 , <4 x i32 > %v3 , <4 x i32 > %v4 ) {
96
96
; SSE2-LABEL: @reduce_and4_transpose(
97
- ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98
- ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
99
- ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
100
- ; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1 ]])
101
- ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3 ]], [[TMP4]]
97
+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98
+ ; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
99
+ ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100
+ ; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3 ]])
101
+ ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP4]]
102
102
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
103
103
; SSE2-NEXT: ret i32 [[OP_RDX1]]
104
104
;
@@ -114,11 +114,11 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i
114
114
; SSE42-NEXT: ret i32 [[OP_RDX3]]
115
115
;
116
116
; AVX-LABEL: @reduce_and4_transpose(
117
- ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118
- ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
119
- ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
120
- ; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1 ]])
121
- ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3 ]], [[TMP4]]
117
+ ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118
+ ; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
119
+ ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
120
+ ; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3 ]])
121
+ ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP4]]
122
122
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
123
123
; AVX-NEXT: ret i32 [[OP_RDX1]]
124
124
;
0 commit comments