Skip to content

Commit 0a43ca7

Browse files
authored
[AMDGPU] Fix missing IsExact flag when expanding vector binary operator (#86712)
1 parent d94dc5f commit 0a43ca7

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,9 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
15941594
}
15951595
}
15961596

1597+
if (auto *NewEltI = dyn_cast<Instruction>(NewElt))
1598+
NewEltI->copyIRFlags(&I);
1599+
15971600
NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
15981601
}
15991602
} else {

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10668,3 +10668,111 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
1066810668
store <2 x i64> %r, ptr addrspace(1) %out
1066910669
ret void
1067010670
}
10671+
10672+
define <2 x i32> @v_sdiv_i32_exact(<2 x i32> %num) {
10673+
; CHECK-LABEL: @v_sdiv_i32_exact(
10674+
; CHECK: %1 = extractelement <2 x i32> %num, i64 0
10675+
; CHECK-NEXT: %2 = sdiv exact i32 %1, 4096
10676+
; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
10677+
; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
10678+
; CHECK-NEXT: %5 = sdiv exact i32 %4, 1024
10679+
; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
10680+
; CHECK-NEXT: ret <2 x i32> %6
10681+
;
10682+
; GFX6-LABEL: v_sdiv_i32_exact:
10683+
; GFX6: ; %bb.0:
10684+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10685+
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 12, v0
10686+
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 10, v1
10687+
; GFX6-NEXT: s_setpc_b64 s[30:31]
10688+
;
10689+
; GFX9-LABEL: v_sdiv_i32_exact:
10690+
; GFX9: ; %bb.0:
10691+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10692+
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 12, v0
10693+
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 10, v1
10694+
; GFX9-NEXT: s_setpc_b64 s[30:31]
10695+
%result = sdiv exact <2 x i32> %num, <i32 4096, i32 1024>
10696+
ret <2 x i32> %result
10697+
}
10698+
10699+
define <2 x i64> @v_sdiv_i64_exact(<2 x i64> %num) {
10700+
; CHECK-LABEL: @v_sdiv_i64_exact(
10701+
; CHECK: %1 = extractelement <2 x i64> %num, i64 0
10702+
; CHECK-NEXT: %2 = sdiv exact i64 %1, 4096
10703+
; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
10704+
; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
10705+
; CHECK-NEXT: %5 = sdiv exact i64 %4, 1024
10706+
; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
10707+
; CHECK-NEXT: ret <2 x i64> %6
10708+
;
10709+
; GFX6-LABEL: v_sdiv_i64_exact:
10710+
; GFX6: ; %bb.0:
10711+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10712+
; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
10713+
; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 10
10714+
; GFX6-NEXT: s_setpc_b64 s[30:31]
10715+
;
10716+
; GFX9-LABEL: v_sdiv_i64_exact:
10717+
; GFX9: ; %bb.0:
10718+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10719+
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 12, v[0:1]
10720+
; GFX9-NEXT: v_ashrrev_i64 v[2:3], 10, v[2:3]
10721+
; GFX9-NEXT: s_setpc_b64 s[30:31]
10722+
%result = sdiv exact <2 x i64> %num, <i64 4096, i64 1024>
10723+
ret <2 x i64> %result
10724+
}
10725+
10726+
define <2 x i32> @v_udiv_i32_exact(<2 x i32> %num) {
10727+
; CHECK-LABEL: @v_udiv_i32_exact(
10728+
; CHECK: %1 = extractelement <2 x i32> %num, i64 0
10729+
; CHECK-NEXT: %2 = udiv exact i32 %1, 4096
10730+
; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
10731+
; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
10732+
; CHECK-NEXT: %5 = udiv exact i32 %4, 1024
10733+
; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
10734+
; CHECK-NEXT: ret <2 x i32> %6
10735+
;
10736+
; GFX6-LABEL: v_udiv_i32_exact:
10737+
; GFX6: ; %bb.0:
10738+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10739+
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 12, v0
10740+
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 10, v1
10741+
; GFX6-NEXT: s_setpc_b64 s[30:31]
10742+
;
10743+
; GFX9-LABEL: v_udiv_i32_exact:
10744+
; GFX9: ; %bb.0:
10745+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10746+
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 12, v0
10747+
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v1
10748+
; GFX9-NEXT: s_setpc_b64 s[30:31]
10749+
%result = udiv exact <2 x i32> %num, <i32 4096, i32 1024>
10750+
ret <2 x i32> %result
10751+
}
10752+
10753+
define <2 x i64> @v_udiv_i64_exact(<2 x i64> %num) {
10754+
; CHECK-LABEL: @v_udiv_i64_exact(
10755+
; CHECK: %1 = extractelement <2 x i64> %num, i64 0
10756+
; CHECK-NEXT: %2 = udiv exact i64 %1, 4096
10757+
; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
10758+
; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
10759+
; CHECK-NEXT: %5 = udiv exact i64 %4, 1024
10760+
; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
10761+
; CHECK-NEXT: ret <2 x i64> %6
10762+
;
10763+
; GFX6-LABEL: v_udiv_i64_exact:
10764+
; GFX6: ; %bb.0:
10765+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10766+
; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
10767+
; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 10
10768+
; GFX6-NEXT: s_setpc_b64 s[30:31]
10769+
;
10770+
; GFX9-LABEL: v_udiv_i64_exact:
10771+
; GFX9: ; %bb.0:
10772+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10773+
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 12, v[0:1]
10774+
; GFX9-NEXT: v_lshrrev_b64 v[2:3], 10, v[2:3]
10775+
; GFX9-NEXT: s_setpc_b64 s[30:31]
10776+
%result = udiv exact <2 x i64> %num, <i64 4096, i64 1024>
10777+
ret <2 x i64> %result
10778+
}

0 commit comments

Comments
 (0)