|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-FLUSH %s
|
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-DENORM %s
|
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-FLUSH %s
|
6 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-DENORM,GFX11-DENORM-TRUE16 %s |
7 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-DENORM,GFX11-DENORM-FAKE16 %s |
8 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s |
9 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-DENORM,GFX11-DENORM-TRUE16 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-DENORM,GFX11-DENORM-FAKE16 %s |
| 8 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s |
| 9 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s |
10 | 10 |
|
11 | 11 | ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
|
12 | 12 | ; make add an instruction if the fadd has more than one use.
|
@@ -1061,61 +1061,33 @@ define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x
|
1061 | 1061 | ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
1062 | 1062 | ; GFX10-NEXT: s_endpgm
|
1063 | 1063 | ;
|
1064 |
| -; GFX11-DENORM-TRUE16-LABEL: fmul_x2_xn2_f16: |
1065 |
| -; GFX11-DENORM-TRUE16: ; %bb.0: |
1066 |
| -; GFX11-DENORM-TRUE16-NEXT: s_clause 0x1 |
1067 |
| -; GFX11-DENORM-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1068 |
| -; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1069 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
1070 |
| -; GFX11-DENORM-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
1071 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0 |
1072 |
| -; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1073 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
1074 |
| -; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1075 |
| -; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1076 |
| -; GFX11-DENORM-TRUE16-NEXT: s_endpgm |
| 1064 | +; GFX11-TRUE16-LABEL: fmul_x2_xn2_f16: |
| 1065 | +; GFX11-TRUE16: ; %bb.0: |
| 1066 | +; GFX11-TRUE16-NEXT: s_clause 0x1 |
| 1067 | +; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| 1068 | +; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 1069 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 1070 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1071 | +; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0 |
| 1072 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1073 | +; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
| 1074 | +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1075 | +; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1076 | +; GFX11-TRUE16-NEXT: s_endpgm |
1077 | 1077 | ;
|
1078 |
| -; GFX11-DENORM-FAKE16-LABEL: fmul_x2_xn2_f16: |
1079 |
| -; GFX11-DENORM-FAKE16: ; %bb.0: |
1080 |
| -; GFX11-DENORM-FAKE16-NEXT: s_clause 0x1 |
1081 |
| -; GFX11-DENORM-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1082 |
| -; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1083 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
1084 |
| -; GFX11-DENORM-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
1085 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0 |
1086 |
| -; GFX11-DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1087 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
1088 |
| -; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1089 |
| -; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1090 |
| -; GFX11-DENORM-FAKE16-NEXT: s_endpgm |
1091 |
| -; |
1092 |
| -; GFX11-FLUSH-TRUE16-LABEL: fmul_x2_xn2_f16: |
1093 |
| -; GFX11-FLUSH-TRUE16: ; %bb.0: |
1094 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x1 |
1095 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1096 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1097 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
1098 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
1099 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0 |
1100 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1101 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
1102 |
| -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1103 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1104 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_endpgm |
1105 |
| -; |
1106 |
| -; GFX11-FLUSH-FAKE16-LABEL: fmul_x2_xn2_f16: |
1107 |
| -; GFX11-FLUSH-FAKE16: ; %bb.0: |
1108 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_clause 0x1 |
1109 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1110 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1111 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
1112 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
1113 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0 |
1114 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1115 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
1116 |
| -; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1117 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1118 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_endpgm |
| 1078 | +; GFX11-FAKE16-LABEL: fmul_x2_xn2_f16: |
| 1079 | +; GFX11-FAKE16: ; %bb.0: |
| 1080 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 1081 | +; GFX11-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| 1082 | +; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 1083 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 1084 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1085 | +; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0 |
| 1086 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1087 | +; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
| 1088 | +; GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1089 | +; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1090 | +; GFX11-FAKE16-NEXT: s_endpgm |
1119 | 1091 | %x = bitcast i16 %x.arg to half
|
1120 | 1092 | %y = bitcast i16 %y.arg to half
|
1121 | 1093 | %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
|
@@ -1157,61 +1129,33 @@ define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x
|
1157 | 1129 | ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
1158 | 1130 | ; GFX10-NEXT: s_endpgm
|
1159 | 1131 | ;
|
1160 |
| -; GFX11-DENORM-TRUE16-LABEL: fmul_x2_xn3_f16: |
1161 |
| -; GFX11-DENORM-TRUE16: ; %bb.0: |
1162 |
| -; GFX11-DENORM-TRUE16-NEXT: s_clause 0x1 |
1163 |
| -; GFX11-DENORM-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1164 |
| -; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1165 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
1166 |
| -; GFX11-DENORM-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
1167 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2 |
1168 |
| -; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1169 |
| -; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
1170 |
| -; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1171 |
| -; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1172 |
| -; GFX11-DENORM-TRUE16-NEXT: s_endpgm |
| 1132 | +; GFX11-TRUE16-LABEL: fmul_x2_xn3_f16: |
| 1133 | +; GFX11-TRUE16: ; %bb.0: |
| 1134 | +; GFX11-TRUE16-NEXT: s_clause 0x1 |
| 1135 | +; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| 1136 | +; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 1137 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 1138 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1139 | +; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2 |
| 1140 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1141 | +; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
| 1142 | +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1143 | +; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1144 | +; GFX11-TRUE16-NEXT: s_endpgm |
1173 | 1145 | ;
|
1174 |
| -; GFX11-DENORM-FAKE16-LABEL: fmul_x2_xn3_f16: |
1175 |
| -; GFX11-DENORM-FAKE16: ; %bb.0: |
1176 |
| -; GFX11-DENORM-FAKE16-NEXT: s_clause 0x1 |
1177 |
| -; GFX11-DENORM-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1178 |
| -; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1179 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
1180 |
| -; GFX11-DENORM-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
1181 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2 |
1182 |
| -; GFX11-DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1183 |
| -; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
1184 |
| -; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1185 |
| -; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1186 |
| -; GFX11-DENORM-FAKE16-NEXT: s_endpgm |
1187 |
| -; |
1188 |
| -; GFX11-FLUSH-TRUE16-LABEL: fmul_x2_xn3_f16: |
1189 |
| -; GFX11-FLUSH-TRUE16: ; %bb.0: |
1190 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x1 |
1191 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1192 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1193 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
1194 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
1195 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2 |
1196 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1197 |
| -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l |
1198 |
| -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1199 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1200 |
| -; GFX11-FLUSH-TRUE16-NEXT: s_endpgm |
1201 |
| -; |
1202 |
| -; GFX11-FLUSH-FAKE16-LABEL: fmul_x2_xn3_f16: |
1203 |
| -; GFX11-FLUSH-FAKE16: ; %bb.0: |
1204 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_clause 0x1 |
1205 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
1206 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
1207 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
1208 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
1209 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2 |
1210 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1211 |
| -; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
1212 |
| -; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
1213 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
1214 |
| -; GFX11-FLUSH-FAKE16-NEXT: s_endpgm |
| 1146 | +; GFX11-FAKE16-LABEL: fmul_x2_xn3_f16: |
| 1147 | +; GFX11-FAKE16: ; %bb.0: |
| 1148 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 1149 | +; GFX11-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| 1150 | +; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 1151 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 1152 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1153 | +; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2 |
| 1154 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1155 | +; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0 |
| 1156 | +; GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1157 | +; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1158 | +; GFX11-FAKE16-NEXT: s_endpgm |
1215 | 1159 | %x = bitcast i16 %x.arg to half
|
1216 | 1160 | %y = bitcast i16 %y.arg to half
|
1217 | 1161 | %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
|
|
0 commit comments