|
2 | 2 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
|
4 | 4 |
|
5 |
| -; This testcase would fail due to not having a free VGPR available to |
| 5 | +; This testcase would fail on GFX908 due to not having a free VGPR available to |
6 | 6 | ; copy between AGPRs.
|
7 |
| -define void @no_free_vgprs_at_agpr_copy(float %v0, float %v1) #0 { |
8 |
| -; GFX908-LABEL: no_free_vgprs_at_agpr_copy: |
| 7 | +define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 { |
| 8 | +; GFX908-LABEL: no_free_vgprs_at_agpr_to_agpr_copy: |
9 | 9 | ; GFX908: ; %bb.0:
|
10 | 10 | ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
11 | 11 | ; GFX908-NEXT: v_mov_b32_e32 v33, v1
|
@@ -156,7 +156,7 @@ define void @no_free_vgprs_at_agpr_copy(float %v0, float %v1) #0 {
|
156 | 156 | ; GFX908-NEXT: ;;#ASMEND
|
157 | 157 | ; GFX908-NEXT: s_setpc_b64 s[30:31]
|
158 | 158 | ;
|
159 |
| -; GFX90A-LABEL: no_free_vgprs_at_agpr_copy: |
| 159 | +; GFX90A-LABEL: no_free_vgprs_at_agpr_to_agpr_copy |
160 | 160 | ; GFX90A: ; %bb.0:
|
161 | 161 | ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
162 | 162 | ; GFX90A-NEXT: v_mov_b32_e32 v33, v0
|
@@ -864,6 +864,256 @@ bb58: ; preds = %bb51, %bb16
|
864 | 864 | br i1 %i66, label %bb16, label %bb12
|
865 | 865 | }
|
866 | 866 |
|
| 867 | +; This testcase would fail on GFX908 due to not having a free VGPR available to |
| 868 | +; copy SGPR to AGPR. |
| 869 | +define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { |
| 870 | +; GFX908-LABEL: no_free_vgprs_at_sgpr_to_agpr_copy: |
| 871 | +; GFX908: ; %bb.0: |
| 872 | +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 873 | +; GFX908-NEXT: v_mov_b32_e32 v33, v1 |
| 874 | +; GFX908-NEXT: v_mov_b32_e32 v34, v0 |
| 875 | +; GFX908-NEXT: ;;#ASMSTART |
| 876 | +; GFX908-NEXT: ; def v[0:31] s[0:15] |
| 877 | +; GFX908-NEXT: ;;#ASMEND |
| 878 | +; GFX908-NEXT: v_mov_b32_e32 v32, s15 |
| 879 | +; GFX908-NEXT: s_nop 1 |
| 880 | +; GFX908-NEXT: v_accvgpr_write_b32 a31, v32 |
| 881 | +; GFX908-NEXT: v_mov_b32_e32 v32, s14 |
| 882 | +; GFX908-NEXT: s_nop 1 |
| 883 | +; GFX908-NEXT: v_accvgpr_write_b32 a30, v32 |
| 884 | +; GFX908-NEXT: v_mov_b32_e32 v32, s13 |
| 885 | +; GFX908-NEXT: s_nop 1 |
| 886 | +; GFX908-NEXT: v_accvgpr_write_b32 a29, v32 |
| 887 | +; GFX908-NEXT: v_mov_b32_e32 v32, s12 |
| 888 | +; GFX908-NEXT: s_nop 1 |
| 889 | +; GFX908-NEXT: v_accvgpr_write_b32 a28, v32 |
| 890 | +; GFX908-NEXT: v_mov_b32_e32 v32, s11 |
| 891 | +; GFX908-NEXT: s_nop 1 |
| 892 | +; GFX908-NEXT: v_accvgpr_write_b32 a27, v32 |
| 893 | +; GFX908-NEXT: v_mov_b32_e32 v32, s10 |
| 894 | +; GFX908-NEXT: s_nop 1 |
| 895 | +; GFX908-NEXT: v_accvgpr_write_b32 a26, v32 |
| 896 | +; GFX908-NEXT: v_mov_b32_e32 v32, s9 |
| 897 | +; GFX908-NEXT: s_nop 1 |
| 898 | +; GFX908-NEXT: v_accvgpr_write_b32 a25, v32 |
| 899 | +; GFX908-NEXT: v_mov_b32_e32 v32, s8 |
| 900 | +; GFX908-NEXT: s_nop 1 |
| 901 | +; GFX908-NEXT: v_accvgpr_write_b32 a24, v32 |
| 902 | +; GFX908-NEXT: v_mov_b32_e32 v32, s7 |
| 903 | +; GFX908-NEXT: s_nop 1 |
| 904 | +; GFX908-NEXT: v_accvgpr_write_b32 a23, v32 |
| 905 | +; GFX908-NEXT: v_mov_b32_e32 v32, s6 |
| 906 | +; GFX908-NEXT: s_nop 1 |
| 907 | +; GFX908-NEXT: v_accvgpr_write_b32 a22, v32 |
| 908 | +; GFX908-NEXT: v_mov_b32_e32 v32, s5 |
| 909 | +; GFX908-NEXT: s_nop 1 |
| 910 | +; GFX908-NEXT: v_accvgpr_write_b32 a21, v32 |
| 911 | +; GFX908-NEXT: v_mov_b32_e32 v32, s4 |
| 912 | +; GFX908-NEXT: s_nop 1 |
| 913 | +; GFX908-NEXT: v_accvgpr_write_b32 a20, v32 |
| 914 | +; GFX908-NEXT: v_mov_b32_e32 v32, s3 |
| 915 | +; GFX908-NEXT: s_nop 1 |
| 916 | +; GFX908-NEXT: v_accvgpr_write_b32 a19, v32 |
| 917 | +; GFX908-NEXT: v_mov_b32_e32 v32, s2 |
| 918 | +; GFX908-NEXT: s_nop 1 |
| 919 | +; GFX908-NEXT: v_accvgpr_write_b32 a18, v32 |
| 920 | +; GFX908-NEXT: v_mov_b32_e32 v32, s1 |
| 921 | +; GFX908-NEXT: s_nop 1 |
| 922 | +; GFX908-NEXT: v_accvgpr_write_b32 a17, v32 |
| 923 | +; GFX908-NEXT: v_mov_b32_e32 v32, s0 |
| 924 | +; GFX908-NEXT: s_nop 1 |
| 925 | +; GFX908-NEXT: v_accvgpr_write_b32 a16, v32 |
| 926 | +; GFX908-NEXT: s_nop 0 |
| 927 | +; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31] |
| 928 | +; GFX908-NEXT: s_nop 7 |
| 929 | +; GFX908-NEXT: s_nop 1 |
| 930 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a0 ; Reload Reuse |
| 931 | +; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; Reload Reuse |
| 932 | +; GFX908-NEXT: v_accvgpr_read_b32 v38, a12 ; Reload Reuse |
| 933 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill |
| 934 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 ; Reload Reuse |
| 935 | +; GFX908-NEXT: v_accvgpr_read_b32 v37, a13 ; Reload Reuse |
| 936 | +; GFX908-NEXT: v_accvgpr_read_b32 v36, a14 ; Reload Reuse |
| 937 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| 938 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 ; Reload Reuse |
| 939 | +; GFX908-NEXT: v_accvgpr_read_b32 v35, a15 ; Reload Reuse |
| 940 | +; GFX908-NEXT: s_nop 0 |
| 941 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| 942 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a3 ; Reload Reuse |
| 943 | +; GFX908-NEXT: s_nop 1 |
| 944 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| 945 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a4 ; Reload Reuse |
| 946 | +; GFX908-NEXT: s_nop 1 |
| 947 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| 948 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a5 ; Reload Reuse |
| 949 | +; GFX908-NEXT: s_nop 1 |
| 950 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| 951 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a6 ; Reload Reuse |
| 952 | +; GFX908-NEXT: s_nop 1 |
| 953 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| 954 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a7 ; Reload Reuse |
| 955 | +; GFX908-NEXT: s_nop 1 |
| 956 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| 957 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a8 ; Reload Reuse |
| 958 | +; GFX908-NEXT: s_nop 1 |
| 959 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| 960 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a9 ; Reload Reuse |
| 961 | +; GFX908-NEXT: s_nop 1 |
| 962 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| 963 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a10 ; Reload Reuse |
| 964 | +; GFX908-NEXT: s_nop 1 |
| 965 | +; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| 966 | +; GFX908-NEXT: ;;#ASMSTART |
| 967 | +; GFX908-NEXT: ; copy |
| 968 | +; GFX908-NEXT: ;;#ASMEND |
| 969 | +; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 |
| 970 | +; GFX908-NEXT: s_nop 1 |
| 971 | +; GFX908-NEXT: v_accvgpr_write_b32 a32, v32 |
| 972 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload |
| 973 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 974 | +; GFX908-NEXT: v_accvgpr_write_b32 a0, v32 ; Reload Reuse |
| 975 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| 976 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 977 | +; GFX908-NEXT: v_accvgpr_write_b32 a1, v32 ; Reload Reuse |
| 978 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| 979 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 980 | +; GFX908-NEXT: v_accvgpr_write_b32 a2, v32 ; Reload Reuse |
| 981 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| 982 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 983 | +; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 ; Reload Reuse |
| 984 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| 985 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 986 | +; GFX908-NEXT: v_accvgpr_write_b32 a4, v32 ; Reload Reuse |
| 987 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| 988 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 989 | +; GFX908-NEXT: v_accvgpr_write_b32 a5, v32 ; Reload Reuse |
| 990 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| 991 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 992 | +; GFX908-NEXT: v_accvgpr_write_b32 a6, v32 ; Reload Reuse |
| 993 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| 994 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 995 | +; GFX908-NEXT: v_accvgpr_write_b32 a7, v32 ; Reload Reuse |
| 996 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| 997 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 998 | +; GFX908-NEXT: v_accvgpr_write_b32 a8, v32 ; Reload Reuse |
| 999 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| 1000 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 1001 | +; GFX908-NEXT: v_accvgpr_write_b32 a9, v32 ; Reload Reuse |
| 1002 | +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| 1003 | +; GFX908-NEXT: s_waitcnt vmcnt(0) |
| 1004 | +; GFX908-NEXT: v_accvgpr_write_b32 a10, v32 ; Reload Reuse |
| 1005 | +; GFX908-NEXT: v_accvgpr_write_b32 a11, v39 ; Reload Reuse |
| 1006 | +; GFX908-NEXT: v_accvgpr_write_b32 a12, v38 ; Reload Reuse |
| 1007 | +; GFX908-NEXT: v_accvgpr_write_b32 a13, v37 ; Reload Reuse |
| 1008 | +; GFX908-NEXT: v_accvgpr_write_b32 a14, v36 ; Reload Reuse |
| 1009 | +; GFX908-NEXT: v_accvgpr_write_b32 a15, v35 ; Reload Reuse |
| 1010 | +; GFX908-NEXT: ;;#ASMSTART |
| 1011 | +; GFX908-NEXT: ; copy |
| 1012 | +; GFX908-NEXT: ;;#ASMEND |
| 1013 | +; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 |
| 1014 | +; GFX908-NEXT: s_nop 1 |
| 1015 | +; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 |
| 1016 | +; GFX908-NEXT: ;;#ASMSTART |
| 1017 | +; GFX908-NEXT: ; use a3 v[0:31] |
| 1018 | +; GFX908-NEXT: ;;#ASMEND |
| 1019 | +; GFX908-NEXT: s_setpc_b64 s[30:31] |
| 1020 | +; |
| 1021 | +; GFX90A-LABEL: no_free_vgprs_at_sgpr_to_agpr_copy: |
| 1022 | +; GFX90A: ; %bb.0: |
| 1023 | +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 1024 | +; GFX90A-NEXT: v_mov_b32_e32 v33, v0 |
| 1025 | +; GFX90A-NEXT: v_mov_b32_e32 v32, v1 |
| 1026 | +; GFX90A-NEXT: ;;#ASMSTART |
| 1027 | +; GFX90A-NEXT: ; def v[0:31] s[0:15] |
| 1028 | +; GFX90A-NEXT: ;;#ASMEND |
| 1029 | +; GFX90A-NEXT: v_accvgpr_write_b32 a31, s15 |
| 1030 | +; GFX90A-NEXT: v_accvgpr_write_b32 a30, s14 |
| 1031 | +; GFX90A-NEXT: v_accvgpr_write_b32 a29, s13 |
| 1032 | +; GFX90A-NEXT: v_accvgpr_write_b32 a28, s12 |
| 1033 | +; GFX90A-NEXT: v_accvgpr_write_b32 a27, s11 |
| 1034 | +; GFX90A-NEXT: v_accvgpr_write_b32 a26, s10 |
| 1035 | +; GFX90A-NEXT: v_accvgpr_write_b32 a25, s9 |
| 1036 | +; GFX90A-NEXT: v_accvgpr_write_b32 a24, s8 |
| 1037 | +; GFX90A-NEXT: v_accvgpr_write_b32 a23, s7 |
| 1038 | +; GFX90A-NEXT: v_accvgpr_write_b32 a22, s6 |
| 1039 | +; GFX90A-NEXT: v_accvgpr_write_b32 a21, s5 |
| 1040 | +; GFX90A-NEXT: v_accvgpr_write_b32 a20, s4 |
| 1041 | +; GFX90A-NEXT: v_accvgpr_write_b32 a19, s3 |
| 1042 | +; GFX90A-NEXT: v_accvgpr_write_b32 a18, s2 |
| 1043 | +; GFX90A-NEXT: v_accvgpr_write_b32 a17, s1 |
| 1044 | +; GFX90A-NEXT: v_accvgpr_write_b32 a16, s0 |
| 1045 | +; GFX90A-NEXT: v_accvgpr_read_b32 v34, a32 ; Reload Reuse |
| 1046 | +; GFX90A-NEXT: s_nop 0 |
| 1047 | +; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] |
| 1048 | +; GFX90A-NEXT: s_nop 7 |
| 1049 | +; GFX90A-NEXT: s_nop 2 |
| 1050 | +; GFX90A-NEXT: buffer_store_dword a0, off, s[0:3], s32 ; 4-byte Folded Spill |
| 1051 | +; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| 1052 | +; GFX90A-NEXT: buffer_store_dword a1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| 1053 | +; GFX90A-NEXT: buffer_store_dword a2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| 1054 | +; GFX90A-NEXT: buffer_store_dword a3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| 1055 | +; GFX90A-NEXT: buffer_store_dword a4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| 1056 | +; GFX90A-NEXT: buffer_store_dword a5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| 1057 | +; GFX90A-NEXT: buffer_store_dword a6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| 1058 | +; GFX90A-NEXT: buffer_store_dword a7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| 1059 | +; GFX90A-NEXT: buffer_store_dword a8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| 1060 | +; GFX90A-NEXT: buffer_store_dword a9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| 1061 | +; GFX90A-NEXT: buffer_store_dword a10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| 1062 | +; GFX90A-NEXT: v_accvgpr_read_b32 v39, a11 ; Reload Reuse |
| 1063 | +; GFX90A-NEXT: v_accvgpr_read_b32 v38, a12 ; Reload Reuse |
| 1064 | +; GFX90A-NEXT: v_accvgpr_read_b32 v37, a13 ; Reload Reuse |
| 1065 | +; GFX90A-NEXT: v_accvgpr_read_b32 v36, a14 ; Reload Reuse |
| 1066 | +; GFX90A-NEXT: v_accvgpr_read_b32 v35, a15 ; Reload Reuse |
| 1067 | +; GFX90A-NEXT: ;;#ASMSTART |
| 1068 | +; GFX90A-NEXT: ; copy |
| 1069 | +; GFX90A-NEXT: ;;#ASMEND |
| 1070 | +; GFX90A-NEXT: v_accvgpr_mov_b32 a32, a1 |
| 1071 | +; GFX90A-NEXT: buffer_load_dword a0, off, s[0:3], s32 ; 4-byte Folded Reload |
| 1072 | +; GFX90A-NEXT: s_nop 0 |
| 1073 | +; GFX90A-NEXT: buffer_load_dword a1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| 1074 | +; GFX90A-NEXT: s_nop 0 |
| 1075 | +; GFX90A-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| 1076 | +; GFX90A-NEXT: s_nop 0 |
| 1077 | +; GFX90A-NEXT: buffer_load_dword a3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| 1078 | +; GFX90A-NEXT: s_nop 0 |
| 1079 | +; GFX90A-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| 1080 | +; GFX90A-NEXT: s_nop 0 |
| 1081 | +; GFX90A-NEXT: buffer_load_dword a5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| 1082 | +; GFX90A-NEXT: s_nop 0 |
| 1083 | +; GFX90A-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| 1084 | +; GFX90A-NEXT: s_nop 0 |
| 1085 | +; GFX90A-NEXT: buffer_load_dword a7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| 1086 | +; GFX90A-NEXT: s_nop 0 |
| 1087 | +; GFX90A-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| 1088 | +; GFX90A-NEXT: s_nop 0 |
| 1089 | +; GFX90A-NEXT: buffer_load_dword a9, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| 1090 | +; GFX90A-NEXT: s_nop 0 |
| 1091 | +; GFX90A-NEXT: buffer_load_dword a10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| 1092 | +; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| 1093 | +; GFX90A-NEXT: v_accvgpr_write_b32 a11, v39 ; Reload Reuse |
| 1094 | +; GFX90A-NEXT: v_accvgpr_write_b32 a12, v38 ; Reload Reuse |
| 1095 | +; GFX90A-NEXT: v_accvgpr_write_b32 a13, v37 ; Reload Reuse |
| 1096 | +; GFX90A-NEXT: v_accvgpr_write_b32 a14, v36 ; Reload Reuse |
| 1097 | +; GFX90A-NEXT: v_accvgpr_write_b32 a15, v35 ; Reload Reuse |
| 1098 | +; GFX90A-NEXT: ;;#ASMSTART |
| 1099 | +; GFX90A-NEXT: ; copy |
| 1100 | +; GFX90A-NEXT: ;;#ASMEND |
| 1101 | +; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a2 |
| 1102 | +; GFX90A-NEXT: ;;#ASMSTART |
| 1103 | +; GFX90A-NEXT: ; use a3 v[0:31] |
| 1104 | +; GFX90A-NEXT: ;;#ASMEND |
| 1105 | +; GFX90A-NEXT: v_accvgpr_write_b32 a32, v34 ; Reload Reuse |
| 1106 | +; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| 1107 | + %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1","=${v[0:31]},=${s[0:15]}"() |
| 1108 | + %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0 |
| 1109 | + %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1 |
| 1110 | + %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0) |
| 1111 | + %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0) |
| 1112 | + %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma) |
| 1113 | + call void asm sideeffect "; use $0 $1","{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0) |
| 1114 | + ret void |
| 1115 | +} |
| 1116 | + |
867 | 1117 | declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
|
868 | 1118 | declare i32 @llvm.amdgcn.workitem.id.x() #2
|
869 | 1119 |
|
|
0 commit comments