Skip to content

Commit 7188dd9

Browse files
committed
Test scalar shift of 33 and 62
Signed-off-by: John Lu <[email protected]>
1 parent fdba3dc commit 7188dd9

File tree

1 file changed

+114
-0
lines changed
  • llvm/test/CodeGen/AMDGPU

1 file changed

+114
-0
lines changed

llvm/test/CodeGen/AMDGPU/sra.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,63 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
788788
ret void
789789
}
790790

791+
define amdgpu_kernel void @s_ashr_33_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
792+
; SI-LABEL: s_ashr_33_i64:
793+
; SI: ; %bb.0:
794+
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
795+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
796+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
797+
; SI-NEXT: s_mov_b32 s3, 0xf000
798+
; SI-NEXT: s_mov_b32 s2, -1
799+
; SI-NEXT: s_waitcnt lgkmcnt(0)
800+
; SI-NEXT: s_ashr_i32 s6, s7, 31
801+
; SI-NEXT: s_ashr_i32 s7, s7, 1
802+
; SI-NEXT: s_add_u32 s4, s7, s4
803+
; SI-NEXT: s_addc_u32 s5, s6, s5
804+
; SI-NEXT: v_mov_b32_e32 v0, s4
805+
; SI-NEXT: v_mov_b32_e32 v1, s5
806+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
807+
; SI-NEXT: s_endpgm
808+
;
809+
; VI-LABEL: s_ashr_33_i64:
810+
; VI: ; %bb.0:
811+
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
812+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
813+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
814+
; VI-NEXT: s_mov_b32 s3, 0xf000
815+
; VI-NEXT: s_mov_b32 s2, -1
816+
; VI-NEXT: s_waitcnt lgkmcnt(0)
817+
; VI-NEXT: s_ashr_i32 s6, s7, 31
818+
; VI-NEXT: s_ashr_i32 s7, s7, 1
819+
; VI-NEXT: s_add_u32 s4, s7, s4
820+
; VI-NEXT: s_addc_u32 s5, s6, s5
821+
; VI-NEXT: v_mov_b32_e32 v0, s4
822+
; VI-NEXT: v_mov_b32_e32 v1, s5
823+
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
824+
; VI-NEXT: s_endpgm
825+
;
826+
; EG-LABEL: s_ashr_33_i64:
827+
; EG: ; %bb.0:
828+
; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
829+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
830+
; EG-NEXT: CF_END
831+
; EG-NEXT: PAD
832+
; EG-NEXT: ALU clause starting at 4:
833+
; EG-NEXT: ASHR T0.W, KC0[5].X, 1,
834+
; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.x,
835+
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
836+
; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
837+
; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
838+
; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
839+
; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
840+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
841+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
842+
%result = ashr i64 %a, 33
843+
%add = add i64 %result, %b
844+
store i64 %add, ptr addrspace(1) %out
845+
ret void
846+
}
847+
791848
define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
792849
; SI-LABEL: v_ashr_33_i64:
793850
; SI: ; %bb.0:
@@ -854,6 +911,63 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
854911
ret void
855912
}
856913

914+
define amdgpu_kernel void @s_ashr_62_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
915+
; SI-LABEL: s_ashr_62_i64:
916+
; SI: ; %bb.0:
917+
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
918+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
919+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
920+
; SI-NEXT: s_mov_b32 s3, 0xf000
921+
; SI-NEXT: s_mov_b32 s2, -1
922+
; SI-NEXT: s_waitcnt lgkmcnt(0)
923+
; SI-NEXT: s_ashr_i32 s6, s7, 31
924+
; SI-NEXT: s_ashr_i32 s7, s7, 30
925+
; SI-NEXT: s_add_u32 s4, s7, s4
926+
; SI-NEXT: s_addc_u32 s5, s6, s5
927+
; SI-NEXT: v_mov_b32_e32 v0, s4
928+
; SI-NEXT: v_mov_b32_e32 v1, s5
929+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
930+
; SI-NEXT: s_endpgm
931+
;
932+
; VI-LABEL: s_ashr_62_i64:
933+
; VI: ; %bb.0:
934+
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
935+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
936+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
937+
; VI-NEXT: s_mov_b32 s3, 0xf000
938+
; VI-NEXT: s_mov_b32 s2, -1
939+
; VI-NEXT: s_waitcnt lgkmcnt(0)
940+
; VI-NEXT: s_ashr_i32 s6, s7, 31
941+
; VI-NEXT: s_ashr_i32 s7, s7, 30
942+
; VI-NEXT: s_add_u32 s4, s7, s4
943+
; VI-NEXT: s_addc_u32 s5, s6, s5
944+
; VI-NEXT: v_mov_b32_e32 v0, s4
945+
; VI-NEXT: v_mov_b32_e32 v1, s5
946+
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
947+
; VI-NEXT: s_endpgm
948+
;
949+
; EG-LABEL: s_ashr_62_i64:
950+
; EG: ; %bb.0:
951+
; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
952+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
953+
; EG-NEXT: CF_END
954+
; EG-NEXT: PAD
955+
; EG-NEXT: ALU clause starting at 4:
956+
; EG-NEXT: ASHR T0.W, KC0[5].X, literal.x,
957+
; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.y,
958+
; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
959+
; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
960+
; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
961+
; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
962+
; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
963+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
964+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
965+
%result = ashr i64 %a, 62
966+
%add = add i64 %result, %b
967+
store i64 %add, ptr addrspace(1) %out
968+
ret void
969+
}
970+
857971
define amdgpu_kernel void @v_ashr_62_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
858972
; SI-LABEL: v_ashr_62_i64:
859973
; SI: ; %bb.0:

0 commit comments

Comments
 (0)