5
5
define amdgpu_ps i32 @s_andn2_i32 (i32 inreg %src0 , i32 inreg %src1 ) {
6
6
; GCN-LABEL: s_andn2_i32:
7
7
; GCN: ; %bb.0:
8
- ; GCN-NEXT: s_not_b32 s0, s3
9
- ; GCN-NEXT: s_and_b32 s0, s2, s0
8
+ ; GCN-NEXT: s_andn2_b32 s0, s2, s3
10
9
; GCN-NEXT: ; return to shader part epilog
11
10
%not.src1 = xor i32 %src1 , -1
12
11
%and = and i32 %src0 , %not.src1
@@ -16,8 +15,7 @@ define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
16
15
define amdgpu_ps i32 @s_andn2_i32_commute (i32 inreg %src0 , i32 inreg %src1 ) {
17
16
; GCN-LABEL: s_andn2_i32_commute:
18
17
; GCN: ; %bb.0:
19
- ; GCN-NEXT: s_not_b32 s0, s3
20
- ; GCN-NEXT: s_and_b32 s0, s0, s2
18
+ ; GCN-NEXT: s_andn2_b32 s0, s2, s3
21
19
; GCN-NEXT: ; return to shader part epilog
22
20
%not.src1 = xor i32 %src1 , -1
23
21
%and = and i32 %not.src1 , %src0
@@ -28,7 +26,7 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
28
26
; GCN-LABEL: s_andn2_i32_multi_use:
29
27
; GCN: ; %bb.0:
30
28
; GCN-NEXT: s_not_b32 s1, s3
31
- ; GCN-NEXT: s_and_b32 s0, s2, s1
29
+ ; GCN-NEXT: s_andn2_b32 s0, s2, s3
32
30
; GCN-NEXT: ; return to shader part epilog
33
31
%not.src1 = xor i32 %src1 , -1
34
32
%and = and i32 %src0 , %not.src1
@@ -40,9 +38,8 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
40
38
define amdgpu_ps { i32 , i32 } @s_andn2_i32_multi_foldable_use (i32 inreg %src0 , i32 inreg %src1 , i32 inreg %src2 ) {
41
39
; GCN-LABEL: s_andn2_i32_multi_foldable_use:
42
40
; GCN: ; %bb.0:
43
- ; GCN-NEXT: s_not_b32 s1, s4
44
- ; GCN-NEXT: s_and_b32 s0, s2, s1
45
- ; GCN-NEXT: s_and_b32 s1, s3, s1
41
+ ; GCN-NEXT: s_andn2_b32 s0, s2, s4
42
+ ; GCN-NEXT: s_andn2_b32 s1, s3, s4
46
43
; GCN-NEXT: ; return to shader part epilog
47
44
%not.src2 = xor i32 %src2 , -1
48
45
%and0 = and i32 %src0 , %not.src2
@@ -91,8 +88,7 @@ define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) {
91
88
define amdgpu_ps i64 @s_andn2_i64 (i64 inreg %src0 , i64 inreg %src1 ) {
92
89
; GCN-LABEL: s_andn2_i64:
93
90
; GCN: ; %bb.0:
94
- ; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
95
- ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
91
+ ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
96
92
; GCN-NEXT: ; return to shader part epilog
97
93
%not.src1 = xor i64 %src1 , -1
98
94
%and = and i64 %src0 , %not.src1
@@ -102,8 +98,7 @@ define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
102
98
define amdgpu_ps i64 @s_andn2_i64_commute (i64 inreg %src0 , i64 inreg %src1 ) {
103
99
; GCN-LABEL: s_andn2_i64_commute:
104
100
; GCN: ; %bb.0:
105
- ; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
106
- ; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
101
+ ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
107
102
; GCN-NEXT: ; return to shader part epilog
108
103
%not.src1 = xor i64 %src1 , -1
109
104
%and = and i64 %not.src1 , %src0
@@ -113,9 +108,8 @@ define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
113
108
define amdgpu_ps { i64 , i64 } @s_andn2_i64_multi_foldable_use (i64 inreg %src0 , i64 inreg %src1 , i64 inreg %src2 ) {
114
109
; GCN-LABEL: s_andn2_i64_multi_foldable_use:
115
110
; GCN: ; %bb.0:
116
- ; GCN-NEXT: s_not_b64 s[6:7], s[6:7]
117
- ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
118
- ; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
111
+ ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7]
112
+ ; GCN-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7]
119
113
; GCN-NEXT: ; return to shader part epilog
120
114
%not.src2 = xor i64 %src2 , -1
121
115
%and0 = and i64 %src0 , %not.src2
@@ -128,10 +122,10 @@ define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i
128
122
define amdgpu_ps { i64 , i64 } @s_andn2_i64_multi_use (i64 inreg %src0 , i64 inreg %src1 ) {
129
123
; GCN-LABEL: s_andn2_i64_multi_use:
130
124
; GCN: ; %bb.0:
131
- ; GCN-NEXT: s_not_b64 s[4:5 ], s[4:5]
132
- ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
133
- ; GCN-NEXT: s_mov_b32 s2, s4
134
- ; GCN-NEXT: s_mov_b32 s3, s5
125
+ ; GCN-NEXT: s_not_b64 s[6:7 ], s[4:5]
126
+ ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
127
+ ; GCN-NEXT: s_mov_b32 s2, s6
128
+ ; GCN-NEXT: s_mov_b32 s3, s7
135
129
; GCN-NEXT: ; return to shader part epilog
136
130
%not.src1 = xor i64 %src1 , -1
137
131
%and = and i64 %src0 , %not.src1
0 commit comments