@@ -10668,3 +10668,111 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
10668
10668
store <2 x i64 > %r , ptr addrspace (1 ) %out
10669
10669
ret void
10670
10670
}
10671
+
10672
+ define <2 x i32 > @v_sdiv_i32_exact (<2 x i32 > %num ) {
10673
+ ; CHECK-LABEL: @v_sdiv_i32_exact(
10674
+ ; CHECK: %1 = extractelement <2 x i32> %num, i64 0
10675
+ ; CHECK-NEXT: %2 = sdiv exact i32 %1, 4096
10676
+ ; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
10677
+ ; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
10678
+ ; CHECK-NEXT: %5 = sdiv exact i32 %4, 1024
10679
+ ; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
10680
+ ; CHECK-NEXT: ret <2 x i32> %6
10681
+ ;
10682
+ ; GFX6-LABEL: v_sdiv_i32_exact:
10683
+ ; GFX6: ; %bb.0:
10684
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10685
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 12, v0
10686
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 10, v1
10687
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
10688
+ ;
10689
+ ; GFX9-LABEL: v_sdiv_i32_exact:
10690
+ ; GFX9: ; %bb.0:
10691
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10692
+ ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 12, v0
10693
+ ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 10, v1
10694
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
10695
+ %result = sdiv exact <2 x i32 > %num , <i32 4096 , i32 1024 >
10696
+ ret <2 x i32 > %result
10697
+ }
10698
+
10699
+ define <2 x i64 > @v_sdiv_i64_exact (<2 x i64 > %num ) {
10700
+ ; CHECK-LABEL: @v_sdiv_i64_exact(
10701
+ ; CHECK: %1 = extractelement <2 x i64> %num, i64 0
10702
+ ; CHECK-NEXT: %2 = sdiv exact i64 %1, 4096
10703
+ ; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
10704
+ ; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
10705
+ ; CHECK-NEXT: %5 = sdiv exact i64 %4, 1024
10706
+ ; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
10707
+ ; CHECK-NEXT: ret <2 x i64> %6
10708
+ ;
10709
+ ; GFX6-LABEL: v_sdiv_i64_exact:
10710
+ ; GFX6: ; %bb.0:
10711
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10712
+ ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
10713
+ ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 10
10714
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
10715
+ ;
10716
+ ; GFX9-LABEL: v_sdiv_i64_exact:
10717
+ ; GFX9: ; %bb.0:
10718
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10719
+ ; GFX9-NEXT: v_ashrrev_i64 v[0:1], 12, v[0:1]
10720
+ ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 10, v[2:3]
10721
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
10722
+ %result = sdiv exact <2 x i64 > %num , <i64 4096 , i64 1024 >
10723
+ ret <2 x i64 > %result
10724
+ }
10725
+
10726
+ define <2 x i32 > @v_udiv_i32_exact (<2 x i32 > %num ) {
10727
+ ; CHECK-LABEL: @v_udiv_i32_exact(
10728
+ ; CHECK: %1 = extractelement <2 x i32> %num, i64 0
10729
+ ; CHECK-NEXT: %2 = udiv exact i32 %1, 4096
10730
+ ; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
10731
+ ; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
10732
+ ; CHECK-NEXT: %5 = udiv exact i32 %4, 1024
10733
+ ; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
10734
+ ; CHECK-NEXT: ret <2 x i32> %6
10735
+ ;
10736
+ ; GFX6-LABEL: v_udiv_i32_exact:
10737
+ ; GFX6: ; %bb.0:
10738
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10739
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 12, v0
10740
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 10, v1
10741
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
10742
+ ;
10743
+ ; GFX9-LABEL: v_udiv_i32_exact:
10744
+ ; GFX9: ; %bb.0:
10745
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10746
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 12, v0
10747
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v1
10748
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
10749
+ %result = udiv exact <2 x i32 > %num , <i32 4096 , i32 1024 >
10750
+ ret <2 x i32 > %result
10751
+ }
10752
+
10753
+ define <2 x i64 > @v_udiv_i64_exact (<2 x i64 > %num ) {
10754
+ ; CHECK-LABEL: @v_udiv_i64_exact(
10755
+ ; CHECK: %1 = extractelement <2 x i64> %num, i64 0
10756
+ ; CHECK-NEXT: %2 = udiv exact i64 %1, 4096
10757
+ ; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
10758
+ ; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
10759
+ ; CHECK-NEXT: %5 = udiv exact i64 %4, 1024
10760
+ ; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
10761
+ ; CHECK-NEXT: ret <2 x i64> %6
10762
+ ;
10763
+ ; GFX6-LABEL: v_udiv_i64_exact:
10764
+ ; GFX6: ; %bb.0:
10765
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10766
+ ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
10767
+ ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 10
10768
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
10769
+ ;
10770
+ ; GFX9-LABEL: v_udiv_i64_exact:
10771
+ ; GFX9: ; %bb.0:
10772
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10773
+ ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 12, v[0:1]
10774
+ ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 10, v[2:3]
10775
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
10776
+ %result = udiv exact <2 x i64 > %num , <i64 4096 , i64 1024 >
10777
+ ret <2 x i64 > %result
10778
+ }
0 commit comments