Skip to content

Commit 70fdd9f

Browse files
authored
[GlobalISel] Check whether G_CTLZ is legal in matchUMulHToLShr (#126457)
We need to check `G_CTLZ` because the combine uses `G_CTLZ` to get log base 2, and it is not always legal for on a target. Fixes SWDEV-512440.
1 parent 5563240 commit 70fdd9f

File tree

3 files changed

+126
-1
lines changed

3 files changed

+126
-1
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5641,6 +5641,7 @@ bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) const {
56415641
Register RHS = MI.getOperand(2).getReg();
56425642
Register Dst = MI.getOperand(0).getReg();
56435643
LLT Ty = MRI.getType(Dst);
5644+
LLT RHSTy = MRI.getType(RHS);
56445645
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
56455646
auto MatchPow2ExceptOne = [&](const Constant *C) {
56465647
if (auto *CI = dyn_cast<ConstantInt>(C))
@@ -5649,7 +5650,10 @@ bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) const {
56495650
};
56505651
if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
56515652
return false;
5652-
return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5653+
// We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
5654+
// get log base 2, and it is not always legal for on a target.
5655+
return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
5656+
isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
56535657
}
56545658

56555659
void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) const {
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O0 -global-isel=true %s -o - | FileCheck %s
3+
4+
define void @test(ptr %p) {
5+
; CHECK-LABEL: test:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: v_mov_b32_e32 v2, v1
9+
; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10+
; CHECK-NEXT: v_mov_b32_e32 v1, v2
11+
; CHECK-NEXT: s_mov_b32 s5, 16
12+
; CHECK-NEXT: s_mov_b32 s6, 0
13+
; CHECK-NEXT: v_mov_b32_e32 v2, s6
14+
; CHECK-NEXT: v_cvt_f32_ubyte0_e64 v2, v2
15+
; CHECK-NEXT: v_rcp_iflag_f32_e64 v2, v2
16+
; CHECK-NEXT: s_mov_b32 s4, 0x4f7ffffe
17+
; CHECK-NEXT: v_mov_b32_e32 v3, s4
18+
; CHECK-NEXT: v_mul_f32_e64 v2, v2, v3
19+
; CHECK-NEXT: v_cvt_u32_f32_e64 v2, v2
20+
; CHECK-NEXT: s_mov_b32 s7, 0
21+
; CHECK-NEXT: v_mov_b32_e32 v3, s7
22+
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
23+
; CHECK-NEXT: v_add_u32_e64 v2, v2, v3
24+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
25+
; CHECK-NEXT: v_mul_hi_u32 v2, v2, v3
26+
; CHECK-NEXT: s_mov_b32 s7, 2
27+
; CHECK-NEXT: v_mov_b32_e32 v3, s7
28+
; CHECK-NEXT: v_add_u32_e64 v2, v2, v3
29+
; CHECK-NEXT: v_mov_b32_e32 v3, s6
30+
; CHECK-NEXT: v_cvt_f32_ubyte0_e64 v3, v3
31+
; CHECK-NEXT: v_rcp_iflag_f32_e64 v3, v3
32+
; CHECK-NEXT: v_mov_b32_e32 v4, s4
33+
; CHECK-NEXT: v_mul_f32_e64 v3, v3, v4
34+
; CHECK-NEXT: v_cvt_u32_f32_e64 v3, v3
35+
; CHECK-NEXT: s_mov_b32 s7, 0
36+
; CHECK-NEXT: v_mov_b32_e32 v4, s7
37+
; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
38+
; CHECK-NEXT: v_add_u32_e64 v3, v3, v4
39+
; CHECK-NEXT: v_mov_b32_e32 v4, s5
40+
; CHECK-NEXT: v_mul_hi_u32 v3, v3, v4
41+
; CHECK-NEXT: s_mov_b32 s7, 2
42+
; CHECK-NEXT: v_mov_b32_e32 v4, s7
43+
; CHECK-NEXT: v_add_u32_e64 v6, v3, v4
44+
; CHECK-NEXT: v_mov_b32_e32 v3, s6
45+
; CHECK-NEXT: v_cvt_f32_ubyte0_e64 v3, v3
46+
; CHECK-NEXT: v_rcp_iflag_f32_e64 v3, v3
47+
; CHECK-NEXT: v_mov_b32_e32 v4, s4
48+
; CHECK-NEXT: v_mul_f32_e64 v3, v3, v4
49+
; CHECK-NEXT: v_cvt_u32_f32_e64 v3, v3
50+
; CHECK-NEXT: s_mov_b32 s7, 0
51+
; CHECK-NEXT: v_mov_b32_e32 v4, s7
52+
; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
53+
; CHECK-NEXT: v_add_u32_e64 v3, v3, v4
54+
; CHECK-NEXT: v_mov_b32_e32 v4, s5
55+
; CHECK-NEXT: v_mul_hi_u32 v3, v3, v4
56+
; CHECK-NEXT: s_mov_b32 s7, 2
57+
; CHECK-NEXT: v_mov_b32_e32 v4, s7
58+
; CHECK-NEXT: v_add_u32_e64 v3, v3, v4
59+
; CHECK-NEXT: v_mov_b32_e32 v4, s6
60+
; CHECK-NEXT: v_cvt_f32_ubyte0_e64 v4, v4
61+
; CHECK-NEXT: v_rcp_iflag_f32_e64 v4, v4
62+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
63+
; CHECK-NEXT: v_mul_f32_e64 v4, v4, v5
64+
; CHECK-NEXT: v_cvt_u32_f32_e64 v4, v4
65+
; CHECK-NEXT: s_mov_b32 s4, 0
66+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
67+
; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
68+
; CHECK-NEXT: v_add_u32_e64 v4, v4, v5
69+
; CHECK-NEXT: v_mov_b32_e32 v5, s5
70+
; CHECK-NEXT: v_mul_hi_u32 v4, v4, v5
71+
; CHECK-NEXT: s_mov_b32 s4, 2
72+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
73+
; CHECK-NEXT: v_add_u32_e64 v4, v4, v5
74+
; CHECK-NEXT: s_mov_b32 s4, 0xff
75+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
76+
; CHECK-NEXT: v_mov_b32_e32 v7, s4
77+
; CHECK-NEXT: v_and_b32_e64 v7, v6, v7
78+
; CHECK-NEXT: s_mov_b32 s6, 8
79+
; CHECK-NEXT: v_mov_b32_e32 v6, s6
80+
; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, v7
81+
; CHECK-NEXT: v_and_or_b32 v2, v2, v5, v6
82+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
83+
; CHECK-NEXT: v_and_b32_e64 v5, v3, v5
84+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
85+
; CHECK-NEXT: v_lshlrev_b32_e64 v3, v3, v5
86+
; CHECK-NEXT: v_mov_b32_e32 v5, s4
87+
; CHECK-NEXT: v_and_b32_e64 v5, v4, v5
88+
; CHECK-NEXT: s_mov_b32 s4, 24
89+
; CHECK-NEXT: v_mov_b32_e32 v4, s4
90+
; CHECK-NEXT: v_lshlrev_b32_e64 v4, v4, v5
91+
; CHECK-NEXT: v_or3_b32 v2, v2, v3, v4
92+
; CHECK-NEXT: flat_store_dword v[0:1], v2
93+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
94+
; CHECK-NEXT: s_setpc_b64 s[30:31]
95+
%B = udiv <4 x i8> splat (i8 16), zeroinitializer
96+
store <4 x i8> %B, ptr %p, align 4
97+
ret void
98+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: test
6+
tracksRegLiveness: true
7+
legalized: true
8+
body: |
9+
bb.0:
10+
liveins: $vgpr0, $vgpr1
11+
; CHECK-LABEL: name: test
12+
; CHECK: liveins: $vgpr0, $vgpr1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
15+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
16+
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[C]]
17+
; CHECK-NEXT: $vgpr0 = COPY [[UMULH]](s32)
18+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
19+
%0:_(s32) = COPY $vgpr0
20+
%1:_(s32) = G_CONSTANT i32 4
21+
%2:_(s32) = G_UMULH %0:_, %1:_
22+
$vgpr0 = COPY %2:_(s32)
23+
SI_RETURN implicit $vgpr0

0 commit comments

Comments
 (0)