@@ -791,6 +791,63 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
791
791
ret void
792
792
}
793
793
794
+ define amdgpu_kernel void @s_ashr_33_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
795
+ ; SI-LABEL: s_ashr_33_i64:
796
+ ; SI: ; %bb.0:
797
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
798
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
799
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
800
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
801
+ ; SI-NEXT: s_mov_b32 s2, -1
802
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
803
+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
804
+ ; SI-NEXT: s_ashr_i32 s7, s7, 1
805
+ ; SI-NEXT: s_add_u32 s4, s7, s4
806
+ ; SI-NEXT: s_addc_u32 s5, s6, s5
807
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
808
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
809
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
810
+ ; SI-NEXT: s_endpgm
811
+ ;
812
+ ; VI-LABEL: s_ashr_33_i64:
813
+ ; VI: ; %bb.0:
814
+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
815
+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
816
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
817
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
818
+ ; VI-NEXT: s_mov_b32 s2, -1
819
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
820
+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
821
+ ; VI-NEXT: s_ashr_i32 s7, s7, 1
822
+ ; VI-NEXT: s_add_u32 s4, s7, s4
823
+ ; VI-NEXT: s_addc_u32 s5, s6, s5
824
+ ; VI-NEXT: v_mov_b32_e32 v0, s4
825
+ ; VI-NEXT: v_mov_b32_e32 v1, s5
826
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
827
+ ; VI-NEXT: s_endpgm
828
+ ;
829
+ ; EG-LABEL: s_ashr_33_i64:
830
+ ; EG: ; %bb.0:
831
+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
832
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
833
+ ; EG-NEXT: CF_END
834
+ ; EG-NEXT: PAD
835
+ ; EG-NEXT: ALU clause starting at 4:
836
+ ; EG-NEXT: ASHR T0.W, KC0[5].X, 1,
837
+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.x,
838
+ ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
839
+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
840
+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
841
+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
842
+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
843
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
844
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
845
+ %result = ashr i64 %a , 33
846
+ %add = add i64 %result , %b
847
+ store i64 %add , ptr addrspace (1 ) %out
848
+ ret void
849
+ }
850
+
794
851
define amdgpu_kernel void @v_ashr_33_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
795
852
; SI-LABEL: v_ashr_33_i64:
796
853
; SI: ; %bb.0:
@@ -857,6 +914,63 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
857
914
ret void
858
915
}
859
916
917
+ define amdgpu_kernel void @s_ashr_62_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
918
+ ; SI-LABEL: s_ashr_62_i64:
919
+ ; SI: ; %bb.0:
920
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
921
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
922
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
923
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
924
+ ; SI-NEXT: s_mov_b32 s2, -1
925
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
926
+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
927
+ ; SI-NEXT: s_ashr_i32 s7, s7, 30
928
+ ; SI-NEXT: s_add_u32 s4, s7, s4
929
+ ; SI-NEXT: s_addc_u32 s5, s6, s5
930
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
931
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
932
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
933
+ ; SI-NEXT: s_endpgm
934
+ ;
935
+ ; VI-LABEL: s_ashr_62_i64:
936
+ ; VI: ; %bb.0:
937
+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
938
+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
939
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
940
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
941
+ ; VI-NEXT: s_mov_b32 s2, -1
942
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
943
+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
944
+ ; VI-NEXT: s_ashr_i32 s7, s7, 30
945
+ ; VI-NEXT: s_add_u32 s4, s7, s4
946
+ ; VI-NEXT: s_addc_u32 s5, s6, s5
947
+ ; VI-NEXT: v_mov_b32_e32 v0, s4
948
+ ; VI-NEXT: v_mov_b32_e32 v1, s5
949
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
950
+ ; VI-NEXT: s_endpgm
951
+ ;
952
+ ; EG-LABEL: s_ashr_62_i64:
953
+ ; EG: ; %bb.0:
954
+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
955
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
956
+ ; EG-NEXT: CF_END
957
+ ; EG-NEXT: PAD
958
+ ; EG-NEXT: ALU clause starting at 4:
959
+ ; EG-NEXT: ASHR T0.W, KC0[5].X, literal.x,
960
+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.y,
961
+ ; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
962
+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
963
+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
964
+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
965
+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
966
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
967
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
968
+ %result = ashr i64 %a , 62
969
+ %add = add i64 %result , %b
970
+ store i64 %add , ptr addrspace (1 ) %out
971
+ ret void
972
+ }
973
+
860
974
define amdgpu_kernel void @v_ashr_62_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
861
975
; SI-LABEL: v_ashr_62_i64:
862
976
; SI: ; %bb.0:
0 commit comments