Skip to content

Commit 07ed920

Browse files
committed
Test scalar shift of 33 and 62
Signed-off-by: John Lu <[email protected]>
1 parent a7fed73 commit 07ed920

File tree

1 file changed

+114
-0
lines changed
  • llvm/test/CodeGen/AMDGPU

1 file changed

+114
-0
lines changed

llvm/test/CodeGen/AMDGPU/sra.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,63 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
791791
ret void
792792
}
793793

794+
define amdgpu_kernel void @s_ashr_33_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
795+
; SI-LABEL: s_ashr_33_i64:
796+
; SI: ; %bb.0:
797+
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
798+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
799+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
800+
; SI-NEXT: s_mov_b32 s3, 0xf000
801+
; SI-NEXT: s_mov_b32 s2, -1
802+
; SI-NEXT: s_waitcnt lgkmcnt(0)
803+
; SI-NEXT: s_ashr_i32 s6, s7, 31
804+
; SI-NEXT: s_ashr_i32 s7, s7, 1
805+
; SI-NEXT: s_add_u32 s4, s7, s4
806+
; SI-NEXT: s_addc_u32 s5, s6, s5
807+
; SI-NEXT: v_mov_b32_e32 v0, s4
808+
; SI-NEXT: v_mov_b32_e32 v1, s5
809+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
810+
; SI-NEXT: s_endpgm
811+
;
812+
; VI-LABEL: s_ashr_33_i64:
813+
; VI: ; %bb.0:
814+
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
815+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
816+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
817+
; VI-NEXT: s_mov_b32 s3, 0xf000
818+
; VI-NEXT: s_mov_b32 s2, -1
819+
; VI-NEXT: s_waitcnt lgkmcnt(0)
820+
; VI-NEXT: s_ashr_i32 s6, s7, 31
821+
; VI-NEXT: s_ashr_i32 s7, s7, 1
822+
; VI-NEXT: s_add_u32 s4, s7, s4
823+
; VI-NEXT: s_addc_u32 s5, s6, s5
824+
; VI-NEXT: v_mov_b32_e32 v0, s4
825+
; VI-NEXT: v_mov_b32_e32 v1, s5
826+
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
827+
; VI-NEXT: s_endpgm
828+
;
829+
; EG-LABEL: s_ashr_33_i64:
830+
; EG: ; %bb.0:
831+
; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
832+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
833+
; EG-NEXT: CF_END
834+
; EG-NEXT: PAD
835+
; EG-NEXT: ALU clause starting at 4:
836+
; EG-NEXT: ASHR T0.W, KC0[5].X, 1,
837+
; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.x,
838+
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
839+
; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
840+
; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
841+
; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
842+
; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
843+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
844+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
845+
%result = ashr i64 %a, 33
846+
%add = add i64 %result, %b
847+
store i64 %add, ptr addrspace(1) %out
848+
ret void
849+
}
850+
794851
define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
795852
; SI-LABEL: v_ashr_33_i64:
796853
; SI: ; %bb.0:
@@ -857,6 +914,63 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
857914
ret void
858915
}
859916

917+
define amdgpu_kernel void @s_ashr_62_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
918+
; SI-LABEL: s_ashr_62_i64:
919+
; SI: ; %bb.0:
920+
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
921+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
922+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
923+
; SI-NEXT: s_mov_b32 s3, 0xf000
924+
; SI-NEXT: s_mov_b32 s2, -1
925+
; SI-NEXT: s_waitcnt lgkmcnt(0)
926+
; SI-NEXT: s_ashr_i32 s6, s7, 31
927+
; SI-NEXT: s_ashr_i32 s7, s7, 30
928+
; SI-NEXT: s_add_u32 s4, s7, s4
929+
; SI-NEXT: s_addc_u32 s5, s6, s5
930+
; SI-NEXT: v_mov_b32_e32 v0, s4
931+
; SI-NEXT: v_mov_b32_e32 v1, s5
932+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
933+
; SI-NEXT: s_endpgm
934+
;
935+
; VI-LABEL: s_ashr_62_i64:
936+
; VI: ; %bb.0:
937+
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
938+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
939+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
940+
; VI-NEXT: s_mov_b32 s3, 0xf000
941+
; VI-NEXT: s_mov_b32 s2, -1
942+
; VI-NEXT: s_waitcnt lgkmcnt(0)
943+
; VI-NEXT: s_ashr_i32 s6, s7, 31
944+
; VI-NEXT: s_ashr_i32 s7, s7, 30
945+
; VI-NEXT: s_add_u32 s4, s7, s4
946+
; VI-NEXT: s_addc_u32 s5, s6, s5
947+
; VI-NEXT: v_mov_b32_e32 v0, s4
948+
; VI-NEXT: v_mov_b32_e32 v1, s5
949+
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
950+
; VI-NEXT: s_endpgm
951+
;
952+
; EG-LABEL: s_ashr_62_i64:
953+
; EG: ; %bb.0:
954+
; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
955+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
956+
; EG-NEXT: CF_END
957+
; EG-NEXT: PAD
958+
; EG-NEXT: ALU clause starting at 4:
959+
; EG-NEXT: ASHR T0.W, KC0[5].X, literal.x,
960+
; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.y,
961+
; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
962+
; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
963+
; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
964+
; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
965+
; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
966+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
967+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
968+
%result = ashr i64 %a, 62
969+
%add = add i64 %result, %b
970+
store i64 %add, ptr addrspace(1) %out
971+
ret void
972+
}
973+
860974
define amdgpu_kernel void @v_ashr_62_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
861975
; SI-LABEL: v_ashr_62_i64:
862976
; SI: ; %bb.0:

0 commit comments

Comments
 (0)