Skip to content

Commit db6f476

Browse files
authored
AMDGPU: Use "countMaxActiveBits() <= 5" to define uint5Bits (#115543)
countMaxTrailingOnes() is not correct. This patch follows the suggestion from #115372.
1 parent 144bdf3 commit db6f476

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3554,7 +3554,7 @@ def : AMDGPUPat <
35543554
>;
35553555

35563556
def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{
3557-
return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5;
3557+
return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5;
35583558
}]>;
35593559

35603560
// x << (bitwidth - y) >> (bitwidth - y)

llvm/test/CodeGen/AMDGPU/extract-lowbits.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,31 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
163163
ret i32 %masked
164164
}
165165

166+
define i32 @bzhi32_d0_even(i32 %val, i32 %numlowbits) nounwind {
167+
; SI-LABEL: bzhi32_d0_even:
168+
; SI: ; %bb.0:
169+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170+
; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
171+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
172+
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
173+
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
174+
; SI-NEXT: s_setpc_b64 s[30:31]
175+
;
176+
; VI-LABEL: bzhi32_d0_even:
177+
; VI: ; %bb.0:
178+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179+
; VI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
180+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
181+
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
182+
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
183+
; VI-NEXT: s_setpc_b64 s[30:31]
184+
%times2 = shl i32 %numlowbits, 1
185+
%numhighbits = sub i32 32, %times2
186+
%highbitscleared = shl i32 %val, %numhighbits
187+
%masked = lshr i32 %highbitscleared, %numhighbits
188+
ret i32 %masked
189+
}
190+
166191
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
167192
; SI-LABEL: bzhi32_d1_indexzext:
168193
; SI: ; %bb.0:

0 commit comments

Comments
 (0)