Skip to content

Commit f21a9b7

Browse files
committed
add new flag
1 parent f5e5399 commit f21a9b7

File tree

7 files changed

+5114
-8418
lines changed

7 files changed

+5114
-8418
lines changed

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 2984 additions & 5925 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll

Lines changed: 870 additions & 494 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll

Lines changed: 56 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-FLUSH %s
44
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-DENORM %s
55
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-FLUSH %s
6-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-DENORM,GFX11-DENORM-TRUE16 %s
7-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-DENORM,GFX11-DENORM-FAKE16 %s
8-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s
9-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-DENORM,GFX11-DENORM-TRUE16 %s
7+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-DENORM,GFX11-DENORM-FAKE16 %s
8+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s
9+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s
1010

1111
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
1212
; make add an instruction if the fadd has more than one use.
@@ -1061,61 +1061,33 @@ define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x
10611061
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
10621062
; GFX10-NEXT: s_endpgm
10631063
;
1064-
; GFX11-DENORM-TRUE16-LABEL: fmul_x2_xn2_f16:
1065-
; GFX11-DENORM-TRUE16: ; %bb.0:
1066-
; GFX11-DENORM-TRUE16-NEXT: s_clause 0x1
1067-
; GFX11-DENORM-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1068-
; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1069-
; GFX11-DENORM-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1070-
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1071-
; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0
1072-
; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1073-
; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1074-
; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1075-
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1076-
; GFX11-DENORM-TRUE16-NEXT: s_endpgm
1064+
; GFX11-TRUE16-LABEL: fmul_x2_xn2_f16:
1065+
; GFX11-TRUE16: ; %bb.0:
1066+
; GFX11-TRUE16-NEXT: s_clause 0x1
1067+
; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1068+
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1069+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1070+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1071+
; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0
1072+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1073+
; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1074+
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1075+
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1076+
; GFX11-TRUE16-NEXT: s_endpgm
10771077
;
1078-
; GFX11-DENORM-FAKE16-LABEL: fmul_x2_xn2_f16:
1079-
; GFX11-DENORM-FAKE16: ; %bb.0:
1080-
; GFX11-DENORM-FAKE16-NEXT: s_clause 0x1
1081-
; GFX11-DENORM-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1082-
; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1083-
; GFX11-DENORM-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1084-
; GFX11-DENORM-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1085-
; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0
1086-
; GFX11-DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1087-
; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1088-
; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1089-
; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1090-
; GFX11-DENORM-FAKE16-NEXT: s_endpgm
1091-
;
1092-
; GFX11-FLUSH-TRUE16-LABEL: fmul_x2_xn2_f16:
1093-
; GFX11-FLUSH-TRUE16: ; %bb.0:
1094-
; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x1
1095-
; GFX11-FLUSH-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1096-
; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1097-
; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1098-
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1099-
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, s2, -4.0
1100-
; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1101-
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1102-
; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1103-
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1104-
; GFX11-FLUSH-TRUE16-NEXT: s_endpgm
1105-
;
1106-
; GFX11-FLUSH-FAKE16-LABEL: fmul_x2_xn2_f16:
1107-
; GFX11-FLUSH-FAKE16: ; %bb.0:
1108-
; GFX11-FLUSH-FAKE16-NEXT: s_clause 0x1
1109-
; GFX11-FLUSH-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1110-
; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1111-
; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1112-
; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1113-
; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0
1114-
; GFX11-FLUSH-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1115-
; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1116-
; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1117-
; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1118-
; GFX11-FLUSH-FAKE16-NEXT: s_endpgm
1078+
; GFX11-FAKE16-LABEL: fmul_x2_xn2_f16:
1079+
; GFX11-FAKE16: ; %bb.0:
1080+
; GFX11-FAKE16-NEXT: s_clause 0x1
1081+
; GFX11-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1082+
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1083+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1084+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1085+
; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, s2, -4.0
1086+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1087+
; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1088+
; GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1089+
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1090+
; GFX11-FAKE16-NEXT: s_endpgm
11191091
%x = bitcast i16 %x.arg to half
11201092
%y = bitcast i16 %y.arg to half
11211093
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
@@ -1157,61 +1129,33 @@ define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x
11571129
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
11581130
; GFX10-NEXT: s_endpgm
11591131
;
1160-
; GFX11-DENORM-TRUE16-LABEL: fmul_x2_xn3_f16:
1161-
; GFX11-DENORM-TRUE16: ; %bb.0:
1162-
; GFX11-DENORM-TRUE16-NEXT: s_clause 0x1
1163-
; GFX11-DENORM-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1164-
; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1165-
; GFX11-DENORM-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1166-
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1167-
; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2
1168-
; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1169-
; GFX11-DENORM-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1170-
; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1171-
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1172-
; GFX11-DENORM-TRUE16-NEXT: s_endpgm
1132+
; GFX11-TRUE16-LABEL: fmul_x2_xn3_f16:
1133+
; GFX11-TRUE16: ; %bb.0:
1134+
; GFX11-TRUE16-NEXT: s_clause 0x1
1135+
; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1136+
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1137+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1138+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1139+
; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2
1140+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1141+
; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1142+
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1143+
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1144+
; GFX11-TRUE16-NEXT: s_endpgm
11731145
;
1174-
; GFX11-DENORM-FAKE16-LABEL: fmul_x2_xn3_f16:
1175-
; GFX11-DENORM-FAKE16: ; %bb.0:
1176-
; GFX11-DENORM-FAKE16-NEXT: s_clause 0x1
1177-
; GFX11-DENORM-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1178-
; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1179-
; GFX11-DENORM-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1180-
; GFX11-DENORM-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1181-
; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2
1182-
; GFX11-DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1183-
; GFX11-DENORM-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1184-
; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1185-
; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1186-
; GFX11-DENORM-FAKE16-NEXT: s_endpgm
1187-
;
1188-
; GFX11-FLUSH-TRUE16-LABEL: fmul_x2_xn3_f16:
1189-
; GFX11-FLUSH-TRUE16: ; %bb.0:
1190-
; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x1
1191-
; GFX11-FLUSH-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1192-
; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1193-
; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1194-
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1195-
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, 0xc600, s2
1196-
; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1197-
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, s2, v0.l
1198-
; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1199-
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1200-
; GFX11-FLUSH-TRUE16-NEXT: s_endpgm
1201-
;
1202-
; GFX11-FLUSH-FAKE16-LABEL: fmul_x2_xn3_f16:
1203-
; GFX11-FLUSH-FAKE16: ; %bb.0:
1204-
; GFX11-FLUSH-FAKE16-NEXT: s_clause 0x1
1205-
; GFX11-FLUSH-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1206-
; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1207-
; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1208-
; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1209-
; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2
1210-
; GFX11-FLUSH-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1211-
; GFX11-FLUSH-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1212-
; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1213-
; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1214-
; GFX11-FLUSH-FAKE16-NEXT: s_endpgm
1146+
; GFX11-FAKE16-LABEL: fmul_x2_xn3_f16:
1147+
; GFX11-FAKE16: ; %bb.0:
1148+
; GFX11-FAKE16-NEXT: s_clause 0x1
1149+
; GFX11-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x8
1150+
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1151+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
1152+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1153+
; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, 0xc600, s2
1154+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1155+
; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, s2, v0
1156+
; GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1157+
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1158+
; GFX11-FAKE16-NEXT: s_endpgm
12151159
%x = bitcast i16 %x.arg to half
12161160
%y = bitcast i16 %y.arg to half
12171161
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1

0 commit comments

Comments
 (0)