Skip to content

Commit 2048248

Browse files
committed
AMDGPU: Add overflow operations to isBoolSGPR
The particular use in the test doesn't seem to do anything for the expanded cases (i.e. the signed add/sub or multiplies).
1 parent c3604ba commit 2048248

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11915,6 +11915,13 @@ bool llvm::isBoolSGPR(SDValue V) {
1191511915
case ISD::OR:
1191611916
case ISD::XOR:
1191711917
return isBoolSGPR(V.getOperand(0)) && isBoolSGPR(V.getOperand(1));
11918+
case ISD::SADDO:
11919+
case ISD::UADDO:
11920+
case ISD::SSUBO:
11921+
case ISD::USUBO:
11922+
case ISD::SMULO:
11923+
case ISD::UMULO:
11924+
return V.getResNo() == 1;
1191811925
}
1191911926
return false;
1192011927
}

llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,95 @@ define i32 @and_sext_bool_fpclass(float %x, i32 %y) {
4545
ret i32 %and
4646
}
4747

48+
; GCN-LABEL: {{^}}and_sext_bool_uadd_w_overflow:
49+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50+
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
51+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
52+
; GCN-NEXT: s_setpc_b64
53+
define i32 @and_sext_bool_uadd_w_overflow(i32 %x, i32 %y) {
54+
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
55+
%carry = extractvalue { i32, i1 } %uadd, 1
56+
%sext = sext i1 %carry to i32
57+
%and = and i32 %sext, %y
58+
ret i32 %and
59+
}
60+
61+
; GCN-LABEL: {{^}}and_sext_bool_usub_w_overflow:
62+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63+
; GCN-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
64+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
65+
; GCN-NEXT: s_setpc_b64
66+
define i32 @and_sext_bool_usub_w_overflow(i32 %x, i32 %y) {
67+
%uadd = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
68+
%carry = extractvalue { i32, i1 } %uadd, 1
69+
%sext = sext i1 %carry to i32
70+
%and = and i32 %sext, %y
71+
ret i32 %and
72+
}
73+
74+
; GCN-LABEL: {{^}}and_sext_bool_sadd_w_overflow:
75+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76+
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
77+
; GCN-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1
78+
; GCN-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
79+
; GCN-NEXT: s_xor_b64 vcc, vcc, s[4:5]
80+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
81+
; GCN-NEXT: s_setpc_b64
82+
define i32 @and_sext_bool_sadd_w_overflow(i32 %x, i32 %y) {
83+
%uadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
84+
%carry = extractvalue { i32, i1 } %uadd, 1
85+
%sext = sext i1 %carry to i32
86+
%and = and i32 %sext, %y
87+
ret i32 %and
88+
}
89+
90+
; GCN-LABEL: {{^}}and_sext_bool_ssub_w_overflow:
91+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92+
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
93+
; GCN-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1
94+
; GCN-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
95+
; GCN-NEXT: s_xor_b64 vcc, vcc, s[4:5]
96+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
97+
; GCN-NEXT: s_setpc_b64
98+
define i32 @and_sext_bool_ssub_w_overflow(i32 %x, i32 %y) {
99+
%uadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
100+
%carry = extractvalue { i32, i1 } %uadd, 1
101+
%sext = sext i1 %carry to i32
102+
%and = and i32 %sext, %y
103+
ret i32 %and
104+
}
105+
106+
; GCN-LABEL: {{^}}and_sext_bool_smul_w_overflow:
107+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108+
; GCN-NEXT: v_mul_hi_i32 v2, v0, v1
109+
; GCN-NEXT: v_mul_lo_u32 v0, v0, v1
110+
; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
111+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v2, v0
112+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
113+
; GCN-NEXT: s_setpc_b64
114+
define i32 @and_sext_bool_smul_w_overflow(i32 %x, i32 %y) {
115+
%uadd = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 %y)
116+
%carry = extractvalue { i32, i1 } %uadd, 1
117+
%sext = sext i1 %carry to i32
118+
%and = and i32 %sext, %y
119+
ret i32 %and
120+
}
121+
122+
; GCN-LABEL: {{^}}and_sext_bool_umul_w_overflow:
123+
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124+
; GCN-NEXT: v_mul_hi_u32 v0, v0, v1
125+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
126+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
127+
; GCN-NEXT: s_setpc_b64
128+
define i32 @and_sext_bool_umul_w_overflow(i32 %x, i32 %y) {
129+
%uadd = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
130+
%carry = extractvalue { i32, i1 } %uadd, 1
131+
%sext = sext i1 %carry to i32
132+
%and = and i32 %sext, %y
133+
ret i32 %and
134+
}
135+
136+
48137
declare i32 @llvm.amdgcn.workitem.id.x() #0
49138

50139
declare i32 @llvm.amdgcn.workitem.id.y() #0

0 commit comments

Comments
 (0)