@@ -788,6 +788,63 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
788
788
ret void
789
789
}
790
790
791
+ define amdgpu_kernel void @s_ashr_33_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
792
+ ; SI-LABEL: s_ashr_33_i64:
793
+ ; SI: ; %bb.0:
794
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
795
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
796
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
797
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
798
+ ; SI-NEXT: s_mov_b32 s2, -1
799
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
800
+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
801
+ ; SI-NEXT: s_ashr_i32 s7, s7, 1
802
+ ; SI-NEXT: s_add_u32 s4, s7, s4
803
+ ; SI-NEXT: s_addc_u32 s5, s6, s5
804
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
805
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
806
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
807
+ ; SI-NEXT: s_endpgm
808
+ ;
809
+ ; VI-LABEL: s_ashr_33_i64:
810
+ ; VI: ; %bb.0:
811
+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
812
+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
813
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
814
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
815
+ ; VI-NEXT: s_mov_b32 s2, -1
816
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
817
+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
818
+ ; VI-NEXT: s_ashr_i32 s7, s7, 1
819
+ ; VI-NEXT: s_add_u32 s4, s7, s4
820
+ ; VI-NEXT: s_addc_u32 s5, s6, s5
821
+ ; VI-NEXT: v_mov_b32_e32 v0, s4
822
+ ; VI-NEXT: v_mov_b32_e32 v1, s5
823
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
824
+ ; VI-NEXT: s_endpgm
825
+ ;
826
+ ; EG-LABEL: s_ashr_33_i64:
827
+ ; EG: ; %bb.0:
828
+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
829
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
830
+ ; EG-NEXT: CF_END
831
+ ; EG-NEXT: PAD
832
+ ; EG-NEXT: ALU clause starting at 4:
833
+ ; EG-NEXT: ASHR T0.W, KC0[5].X, 1,
834
+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.x,
835
+ ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
836
+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
837
+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
838
+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
839
+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
840
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
841
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
842
+ %result = ashr i64 %a , 33
843
+ %add = add i64 %result , %b
844
+ store i64 %add , ptr addrspace (1 ) %out
845
+ ret void
846
+ }
847
+
791
848
define amdgpu_kernel void @v_ashr_33_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
792
849
; SI-LABEL: v_ashr_33_i64:
793
850
; SI: ; %bb.0:
@@ -854,6 +911,63 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
854
911
ret void
855
912
}
856
913
914
+ define amdgpu_kernel void @s_ashr_62_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
915
+ ; SI-LABEL: s_ashr_62_i64:
916
+ ; SI: ; %bb.0:
917
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
918
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
919
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
920
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
921
+ ; SI-NEXT: s_mov_b32 s2, -1
922
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
923
+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
924
+ ; SI-NEXT: s_ashr_i32 s7, s7, 30
925
+ ; SI-NEXT: s_add_u32 s4, s7, s4
926
+ ; SI-NEXT: s_addc_u32 s5, s6, s5
927
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
928
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
929
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
930
+ ; SI-NEXT: s_endpgm
931
+ ;
932
+ ; VI-LABEL: s_ashr_62_i64:
933
+ ; VI: ; %bb.0:
934
+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
935
+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
936
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
937
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
938
+ ; VI-NEXT: s_mov_b32 s2, -1
939
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
940
+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
941
+ ; VI-NEXT: s_ashr_i32 s7, s7, 30
942
+ ; VI-NEXT: s_add_u32 s4, s7, s4
943
+ ; VI-NEXT: s_addc_u32 s5, s6, s5
944
+ ; VI-NEXT: v_mov_b32_e32 v0, s4
945
+ ; VI-NEXT: v_mov_b32_e32 v1, s5
946
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
947
+ ; VI-NEXT: s_endpgm
948
+ ;
949
+ ; EG-LABEL: s_ashr_62_i64:
950
+ ; EG: ; %bb.0:
951
+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
952
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
953
+ ; EG-NEXT: CF_END
954
+ ; EG-NEXT: PAD
955
+ ; EG-NEXT: ALU clause starting at 4:
956
+ ; EG-NEXT: ASHR T0.W, KC0[5].X, literal.x,
957
+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.y,
958
+ ; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
959
+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
960
+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
961
+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
962
+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
963
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
964
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
965
+ %result = ashr i64 %a , 62
966
+ %add = add i64 %result , %b
967
+ store i64 %add , ptr addrspace (1 ) %out
968
+ ret void
969
+ }
970
+
857
971
define amdgpu_kernel void @v_ashr_62_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
858
972
; SI-LABEL: v_ashr_62_i64:
859
973
; SI: ; %bb.0:
0 commit comments