5
5
target triple = "x86_64--"
6
6
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7
7
8
+ ; FIXME: This should only need 2 'or' instructions.
9
+
8
10
define i32 @ext_ext_or_reduction_v4i32 (<4 x i32 > %x , <4 x i32 > %y ) {
9
11
; CHECK-LABEL: @ext_ext_or_reduction_v4i32(
10
12
; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[Y:%.*]], [[X:%.*]]
11
13
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
12
14
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[Z]], [[TMP1]]
13
- ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2 ]], i32 0
14
- ; CHECK-NEXT: [[Z2 :%.*]] = extractelement <4 x i32> [[Z ]], i32 2
15
- ; CHECK-NEXT: [[Z012 :%.*]] = or i32 [[TMP3 ]], [[Z2]]
16
- ; CHECK-NEXT: [[Z3 :%.*]] = extractelement <4 x i32> [[Z ]], i32 3
17
- ; CHECK-NEXT: [[Z0123 :%.*]] = or i32 [[Z012 ]], [[Z3]]
18
- ; CHECK-NEXT: ret i32 [[Z0123 ]]
15
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z ]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
16
+ ; CHECK-NEXT: [[TMP4 :%.*]] = or <4 x i32> [[TMP2 ]], [[TMP3]]
17
+ ; CHECK-NEXT: [[TMP5 :%.*]] = shufflevector <4 x i32> [[Z ]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
18
+ ; CHECK-NEXT: [[TMP6 :%.*]] = or <4 x i32> [[TMP4 ]], [[TMP5]]
19
+ ; CHECK-NEXT: [[TMP7 :%.*]] = extractelement <4 x i32> [[TMP6 ]], i32 0
20
+ ; CHECK-NEXT: ret i32 [[TMP7 ]]
19
21
;
20
22
%z = and <4 x i32 > %x , %y
21
23
%z0 = extractelement <4 x i32 > %z , i32 0
@@ -32,10 +34,10 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
32
34
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
33
35
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
34
36
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[X]]
35
- ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2 ]], i64 0
36
- ; CHECK-NEXT: [[X2 :%.*]] = extractelement <4 x i32> [[X ]], i32 2
37
- ; CHECK-NEXT: [[X210 :%.*]] = add i32 [[TMP3 ]], [[X2]]
38
- ; CHECK-NEXT: ret i32 [[X210 ]]
37
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X ]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
38
+ ; CHECK-NEXT: [[TMP4 :%.*]] = add <4 x i32> [[TMP2 ]], [[TMP3]]
39
+ ; CHECK-NEXT: [[TMP5 :%.*]] = extractelement <4 x i32> [[TMP4 ]], i64 0
40
+ ; CHECK-NEXT: ret i32 [[TMP5 ]]
39
41
;
40
42
%x0 = extractelement <4 x i32 > %x , i32 0
41
43
%x1 = extractelement <4 x i32 > %x , i32 1
@@ -47,14 +49,14 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
47
49
48
50
define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32 (<4 x i32 > %x , <4 x i32 > %y ) {
49
51
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
50
- ; CHECK-NEXT: [[Y1 :%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 1
51
- ; CHECK-NEXT: [[Y2 :%.*]] = extractelement <4 x i32> [[Y]], i32 2
52
- ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
53
- ; CHECK-NEXT: [[TMP2 :%.*]] = add <4 x i32> [[TMP1 ]], [[Y]]
54
- ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <4 x i32> [[TMP2 ]], i32 0
55
- ; CHECK-NEXT: [[Y210 :%.*]] = add i32 [[TMP3 ]], [[Y1 ]]
56
- ; CHECK-NEXT: [[X2Y210 :%.*]] = add i32 [[Y210 ]], [[Y2]]
57
- ; CHECK-NEXT: ret i32 [[X2Y210 ]]
52
+ ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
53
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
54
+ ; CHECK-NEXT: [[TMP3 :%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
55
+ ; CHECK-NEXT: [[TMP4 :%.*]] = add <4 x i32> [[TMP3 ]], [[Y]]
56
+ ; CHECK-NEXT: [[TMP5 :%.*]] = add <4 x i32> [[TMP4 ]], [[TMP1]]
57
+ ; CHECK-NEXT: [[TMP6 :%.*]] = add <4 x i32> [[TMP5 ]], [[TMP2 ]]
58
+ ; CHECK-NEXT: [[TMP7 :%.*]] = extractelement <4 x i32> [[TMP6 ]], i32 0
59
+ ; CHECK-NEXT: ret i32 [[TMP7 ]]
58
60
;
59
61
%y0 = extractelement <4 x i32 > %y , i32 0
60
62
%y1 = extractelement <4 x i32 > %y , i32 1
0 commit comments