Skip to content

Commit 87b2af8

Browse files
committed
AMDGPU/GlobalISel: Enable s_{and|or}n2_{b32|b64} patterns
1 parent aa57cab commit 87b2af8

File tree

3 files changed

+34
-40
lines changed

3 files changed

+34
-40
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,14 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
408408
class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
409409
(ops node:$src0),
410410
(Op $src0),
411-
[{ return !N->isDivergent(); }]
412-
>;
411+
[{ return !N->isDivergent(); }]> {
412+
// This check is unnecessary as it's captured by the result register
413+
// bank constraint.
414+
//
415+
// FIXME: Should add a way for the emitter to recognize this is a
416+
// trivially true predicate to eliminate the check.
417+
let GISelPredicateCode = [{return true;}];
418+
}
413419

414420
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
415421
(ops node:$src0, node:$src1),

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
66
; GCN-LABEL: s_andn2_i32:
77
; GCN: ; %bb.0:
8-
; GCN-NEXT: s_not_b32 s0, s3
9-
; GCN-NEXT: s_and_b32 s0, s2, s0
8+
; GCN-NEXT: s_andn2_b32 s0, s2, s3
109
; GCN-NEXT: ; return to shader part epilog
1110
%not.src1 = xor i32 %src1, -1
1211
%and = and i32 %src0, %not.src1
@@ -16,8 +15,7 @@ define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
1615
define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
1716
; GCN-LABEL: s_andn2_i32_commute:
1817
; GCN: ; %bb.0:
19-
; GCN-NEXT: s_not_b32 s0, s3
20-
; GCN-NEXT: s_and_b32 s0, s0, s2
18+
; GCN-NEXT: s_andn2_b32 s0, s2, s3
2119
; GCN-NEXT: ; return to shader part epilog
2220
%not.src1 = xor i32 %src1, -1
2321
%and = and i32 %not.src1, %src0
@@ -28,7 +26,7 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
2826
; GCN-LABEL: s_andn2_i32_multi_use:
2927
; GCN: ; %bb.0:
3028
; GCN-NEXT: s_not_b32 s1, s3
31-
; GCN-NEXT: s_and_b32 s0, s2, s1
29+
; GCN-NEXT: s_andn2_b32 s0, s2, s3
3230
; GCN-NEXT: ; return to shader part epilog
3331
%not.src1 = xor i32 %src1, -1
3432
%and = and i32 %src0, %not.src1
@@ -40,9 +38,8 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
4038
define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
4139
; GCN-LABEL: s_andn2_i32_multi_foldable_use:
4240
; GCN: ; %bb.0:
43-
; GCN-NEXT: s_not_b32 s1, s4
44-
; GCN-NEXT: s_and_b32 s0, s2, s1
45-
; GCN-NEXT: s_and_b32 s1, s3, s1
41+
; GCN-NEXT: s_andn2_b32 s0, s2, s4
42+
; GCN-NEXT: s_andn2_b32 s1, s3, s4
4643
; GCN-NEXT: ; return to shader part epilog
4744
%not.src2 = xor i32 %src2, -1
4845
%and0 = and i32 %src0, %not.src2
@@ -91,8 +88,7 @@ define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) {
9188
define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
9289
; GCN-LABEL: s_andn2_i64:
9390
; GCN: ; %bb.0:
94-
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
95-
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
91+
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
9692
; GCN-NEXT: ; return to shader part epilog
9793
%not.src1 = xor i64 %src1, -1
9894
%and = and i64 %src0, %not.src1
@@ -102,8 +98,7 @@ define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
10298
define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
10399
; GCN-LABEL: s_andn2_i64_commute:
104100
; GCN: ; %bb.0:
105-
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
106-
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
101+
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
107102
; GCN-NEXT: ; return to shader part epilog
108103
%not.src1 = xor i64 %src1, -1
109104
%and = and i64 %not.src1, %src0
@@ -113,9 +108,8 @@ define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
113108
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
114109
; GCN-LABEL: s_andn2_i64_multi_foldable_use:
115110
; GCN: ; %bb.0:
116-
; GCN-NEXT: s_not_b64 s[6:7], s[6:7]
117-
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
118-
; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
111+
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7]
112+
; GCN-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7]
119113
; GCN-NEXT: ; return to shader part epilog
120114
%not.src2 = xor i64 %src2, -1
121115
%and0 = and i64 %src0, %not.src2
@@ -128,10 +122,10 @@ define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i
128122
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
129123
; GCN-LABEL: s_andn2_i64_multi_use:
130124
; GCN: ; %bb.0:
131-
; GCN-NEXT: s_not_b64 s[4:5], s[4:5]
132-
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
133-
; GCN-NEXT: s_mov_b32 s2, s4
134-
; GCN-NEXT: s_mov_b32 s3, s5
125+
; GCN-NEXT: s_not_b64 s[6:7], s[4:5]
126+
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
127+
; GCN-NEXT: s_mov_b32 s2, s6
128+
; GCN-NEXT: s_mov_b32 s3, s7
135129
; GCN-NEXT: ; return to shader part epilog
136130
%not.src1 = xor i64 %src1, -1
137131
%and = and i64 %src0, %not.src1

llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
66
; GCN-LABEL: s_orn2_i32:
77
; GCN: ; %bb.0:
8-
; GCN-NEXT: s_not_b32 s0, s3
9-
; GCN-NEXT: s_or_b32 s0, s2, s0
8+
; GCN-NEXT: s_orn2_b32 s0, s2, s3
109
; GCN-NEXT: ; return to shader part epilog
1110
%not.src1 = xor i32 %src1, -1
1211
%or = or i32 %src0, %not.src1
@@ -16,8 +15,7 @@ define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
1615
define amdgpu_ps i32 @s_orn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
1716
; GCN-LABEL: s_orn2_i32_commute:
1817
; GCN: ; %bb.0:
19-
; GCN-NEXT: s_not_b32 s0, s3
20-
; GCN-NEXT: s_or_b32 s0, s0, s2
18+
; GCN-NEXT: s_orn2_b32 s0, s2, s3
2119
; GCN-NEXT: ; return to shader part epilog
2220
%not.src1 = xor i32 %src1, -1
2321
%or = or i32 %not.src1, %src0
@@ -28,7 +26,7 @@ define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_use(i32 inreg %src0, i32 inreg %
2826
; GCN-LABEL: s_orn2_i32_multi_use:
2927
; GCN: ; %bb.0:
3028
; GCN-NEXT: s_not_b32 s1, s3
31-
; GCN-NEXT: s_or_b32 s0, s2, s1
29+
; GCN-NEXT: s_orn2_b32 s0, s2, s3
3230
; GCN-NEXT: ; return to shader part epilog
3331
%not.src1 = xor i32 %src1, -1
3432
%or = or i32 %src0, %not.src1
@@ -40,9 +38,8 @@ define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_use(i32 inreg %src0, i32 inreg %
4038
define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
4139
; GCN-LABEL: s_orn2_i32_multi_foldable_use:
4240
; GCN: ; %bb.0:
43-
; GCN-NEXT: s_not_b32 s1, s4
44-
; GCN-NEXT: s_or_b32 s0, s2, s1
45-
; GCN-NEXT: s_or_b32 s1, s3, s1
41+
; GCN-NEXT: s_orn2_b32 s0, s2, s4
42+
; GCN-NEXT: s_orn2_b32 s1, s3, s4
4643
; GCN-NEXT: ; return to shader part epilog
4744
%not.src2 = xor i32 %src2, -1
4845
%or0 = or i32 %src0, %not.src2
@@ -91,8 +88,7 @@ define amdgpu_ps float @v_orn2_i32_vs(i32 %src0, i32 inreg %src1) {
9188
define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) {
9289
; GCN-LABEL: s_orn2_i64:
9390
; GCN: ; %bb.0:
94-
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
95-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
91+
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
9692
; GCN-NEXT: ; return to shader part epilog
9793
%not.src1 = xor i64 %src1, -1
9894
%or = or i64 %src0, %not.src1
@@ -102,8 +98,7 @@ define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) {
10298
define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
10399
; GCN-LABEL: s_orn2_i64_commute:
104100
; GCN: ; %bb.0:
105-
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
106-
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
101+
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
107102
; GCN-NEXT: ; return to shader part epilog
108103
%not.src1 = xor i64 %src1, -1
109104
%or = or i64 %not.src1, %src0
@@ -113,9 +108,8 @@ define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
113108
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
114109
; GCN-LABEL: s_orn2_i64_multi_foldable_use:
115110
; GCN: ; %bb.0:
116-
; GCN-NEXT: s_not_b64 s[6:7], s[6:7]
117-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7]
118-
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[6:7]
111+
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[6:7]
112+
; GCN-NEXT: s_orn2_b64 s[2:3], s[4:5], s[6:7]
119113
; GCN-NEXT: ; return to shader part epilog
120114
%not.src2 = xor i64 %src2, -1
121115
%or0 = or i64 %src0, %not.src2
@@ -128,10 +122,10 @@ define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i6
128122
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
129123
; GCN-LABEL: s_orn2_i64_multi_use:
130124
; GCN: ; %bb.0:
131-
; GCN-NEXT: s_not_b64 s[4:5], s[4:5]
132-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
133-
; GCN-NEXT: s_mov_b32 s2, s4
134-
; GCN-NEXT: s_mov_b32 s3, s5
125+
; GCN-NEXT: s_not_b64 s[6:7], s[4:5]
126+
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
127+
; GCN-NEXT: s_mov_b32 s2, s6
128+
; GCN-NEXT: s_mov_b32 s3, s7
135129
; GCN-NEXT: ; return to shader part epilog
136130
%not.src1 = xor i64 %src1, -1
137131
%or = or i64 %src0, %not.src1

0 commit comments

Comments
 (0)