Skip to content

Commit 9b4fa85

Browse files
GlobalISel/IRTranslator resetTargetOptions based on function attributes
Update TargetMachine.Options with function attributes before we start to generate MIR instructions. This allows access to correct function attributes via TargetMachine.Options (it used to access attributes of the function that was translated first). This affects some existing tests with "no-nans-fp-math" attribute. Follow-up on D87456. Differential Revision: https://reviews.llvm.org/D87511
1 parent 4874129 commit 9b4fa85

File tree

3 files changed

+132
-119
lines changed

3 files changed

+132
-119
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2917,6 +2917,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
29172917
DL = &F.getParent()->getDataLayout();
29182918
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
29192919
const TargetMachine &TM = MF->getTarget();
2920+
TM.resetTargetOptions(F);
29202921
EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
29212922
FuncInfo.MF = MF;
29222923
if (EnableOpts)

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll

Lines changed: 96 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -105,41 +105,61 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad
105105
; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64
106106
; SI-NEXT: s_waitcnt vmcnt(2)
107107
; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2
108+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
109+
; SI-NEXT: s_waitcnt vmcnt(1)
110+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3
111+
; SI-NEXT: v_min_f32_e32 v5, v2, v3
112+
; SI-NEXT: v_max_f32_e32 v2, v2, v3
108113
; SI-NEXT: s_waitcnt vmcnt(0)
109-
; SI-NEXT: v_med3_f32 v2, v2, v3, v4
114+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v4
115+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
116+
; SI-NEXT: v_min_f32_e32 v2, v2, v3
117+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5
118+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
119+
; SI-NEXT: v_max_f32_e32 v2, v3, v2
110120
; SI-NEXT: s_mov_b64 s[2:3], s[10:11]
111121
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
112122
; SI-NEXT: s_endpgm
113123
;
114124
; VI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0:
115125
; VI: ; %bb.0:
116126
; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
117-
; VI-NEXT: v_lshlrev_b32_e32 v8, 2, v0
127+
; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v0
118128
; VI-NEXT: s_waitcnt lgkmcnt(0)
119129
; VI-NEXT: v_mov_b32_e32 v0, s2
120130
; VI-NEXT: v_mov_b32_e32 v1, s3
121-
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v8
131+
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6
122132
; VI-NEXT: v_mov_b32_e32 v2, s4
123133
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
124134
; VI-NEXT: v_mov_b32_e32 v3, s5
125-
; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v8
135+
; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v6
126136
; VI-NEXT: v_mov_b32_e32 v4, s6
127137
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
128138
; VI-NEXT: v_mov_b32_e32 v5, s7
129-
; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v8
139+
; VI-NEXT: v_add_u32_e32 v4, vcc, v4, v6
130140
; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
131-
; VI-NEXT: flat_load_dword v0, v[0:1]
132-
; VI-NEXT: flat_load_dword v1, v[2:3]
133-
; VI-NEXT: flat_load_dword v2, v[4:5]
134-
; VI-NEXT: v_mov_b32_e32 v7, s1
135-
; VI-NEXT: v_mov_b32_e32 v6, s0
136-
; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v8
137-
; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
141+
; VI-NEXT: flat_load_dword v7, v[0:1]
142+
; VI-NEXT: flat_load_dword v2, v[2:3]
143+
; VI-NEXT: flat_load_dword v3, v[4:5]
144+
; VI-NEXT: v_mov_b32_e32 v0, s0
145+
; VI-NEXT: v_mov_b32_e32 v1, s1
146+
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6
147+
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
138148
; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
139-
; VI-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
149+
; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7
150+
; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
151+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
152+
; VI-NEXT: v_mul_f32_e32 v4, 1.0, v4
153+
; VI-NEXT: v_min_f32_e32 v5, v4, v2
154+
; VI-NEXT: v_max_f32_e32 v2, v4, v2
140155
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
141-
; VI-NEXT: v_med3_f32 v0, v0, v1, v2
142-
; VI-NEXT: flat_store_dword v[6:7], v0
156+
; VI-NEXT: v_mul_f32_e32 v3, 1.0, v3
157+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
158+
; VI-NEXT: v_min_f32_e32 v2, v2, v3
159+
; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5
160+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
161+
; VI-NEXT: v_max_f32_e32 v2, v3, v2
162+
; VI-NEXT: flat_store_dword v[0:1], v2
143163
; VI-NEXT: s_endpgm
144164
;
145165
; GFX9-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0:
@@ -152,8 +172,18 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad
152172
; GFX9-NEXT: global_load_dword v3, v0, s[6:7]
153173
; GFX9-NEXT: s_waitcnt vmcnt(2)
154174
; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1
175+
; GFX9-NEXT: s_waitcnt vmcnt(1)
176+
; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
177+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
178+
; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
179+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
155180
; GFX9-NEXT: s_waitcnt vmcnt(0)
156-
; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
181+
; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
182+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
183+
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
184+
; GFX9-NEXT: v_max_f32_e32 v2, v4, v4
185+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
186+
; GFX9-NEXT: v_max_f32_e32 v1, v2, v1
157187
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
158188
; GFX9-NEXT: s_endpgm
159189
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -396,7 +426,13 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
396426
; SI-NEXT: v_add_f32_e32 v3, 2.0, v3
397427
; SI-NEXT: s_waitcnt vmcnt(0)
398428
; SI-NEXT: v_add_f32_e32 v4, 4.0, v4
399-
; SI-NEXT: v_med3_f32 v2, v2, v3, v4
429+
; SI-NEXT: v_min_f32_e32 v5, v2, v3
430+
; SI-NEXT: v_max_f32_e32 v2, v2, v3
431+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
432+
; SI-NEXT: v_min_f32_e32 v2, v2, v4
433+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5
434+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
435+
; SI-NEXT: v_max_f32_e32 v2, v3, v2
400436
; SI-NEXT: s_mov_b64 s[2:3], s[10:11]
401437
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
402438
; SI-NEXT: s_endpgm
@@ -429,9 +465,15 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
429465
; VI-NEXT: v_add_f32_e32 v4, 1.0, v7
430466
; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
431467
; VI-NEXT: v_add_f32_e32 v2, 2.0, v2
468+
; VI-NEXT: v_min_f32_e32 v5, v4, v2
469+
; VI-NEXT: v_max_f32_e32 v2, v4, v2
432470
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
433471
; VI-NEXT: v_add_f32_e32 v3, 4.0, v3
434-
; VI-NEXT: v_med3_f32 v2, v4, v2, v3
472+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
473+
; VI-NEXT: v_min_f32_e32 v2, v2, v3
474+
; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5
475+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
476+
; VI-NEXT: v_max_f32_e32 v2, v3, v2
435477
; VI-NEXT: flat_store_dword v[0:1], v2
436478
; VI-NEXT: s_endpgm
437479
;
@@ -447,9 +489,15 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
447489
; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
448490
; GFX9-NEXT: s_waitcnt vmcnt(1)
449491
; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2
492+
; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
493+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
450494
; GFX9-NEXT: s_waitcnt vmcnt(0)
451495
; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3
452-
; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3
496+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
497+
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
498+
; GFX9-NEXT: v_max_f32_e32 v2, v4, v4
499+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
500+
; GFX9-NEXT: v_max_f32_e32 v1, v2, v1
453501
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
454502
; GFX9-NEXT: s_endpgm
455503
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -495,13 +543,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(
495543
; SI-NEXT: s_mov_b32 s3, s11
496544
; SI-NEXT: s_mov_b64 s[8:9], s[6:7]
497545
; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64
546+
; SI-NEXT: s_waitcnt vmcnt(2)
547+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
498548
; SI-NEXT: s_waitcnt vmcnt(1)
549+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3
499550
; SI-NEXT: v_min_f32_e32 v5, v2, v3
500551
; SI-NEXT: v_max_f32_e32 v2, v2, v3
552+
; SI-NEXT: s_waitcnt vmcnt(0)
553+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v4
501554
; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0
502-
; SI-NEXT: s_waitcnt vmcnt(1)
503-
; SI-NEXT: v_min_f32_e32 v2, v2, v4
504-
; SI-NEXT: v_max_f32_e32 v2, v5, v2
555+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
556+
; SI-NEXT: v_min_f32_e32 v2, v2, v3
557+
; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5
558+
; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2
559+
; SI-NEXT: v_max_f32_e32 v2, v3, v2
505560
; SI-NEXT: s_mov_b64 s[2:3], s[10:11]
506561
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
507562
; SI-NEXT: s_endpgm
@@ -530,13 +585,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(
530585
; VI-NEXT: v_mov_b32_e32 v1, s1
531586
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6
532587
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
588+
; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
589+
; VI-NEXT: v_mul_f32_e32 v4, 1.0, v7
533590
; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
534-
; VI-NEXT: v_min_f32_e32 v4, v7, v2
535-
; VI-NEXT: v_max_f32_e32 v2, v7, v2
591+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
592+
; VI-NEXT: v_min_f32_e32 v5, v4, v2
593+
; VI-NEXT: v_max_f32_e32 v2, v4, v2
536594
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
595+
; VI-NEXT: v_mul_f32_e32 v3, 1.0, v3
596+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
537597
; VI-NEXT: v_min_f32_e32 v2, v2, v3
538-
; VI-NEXT: v_max_f32_e32 v2, v4, v2
539-
; VI-NEXT: flat_store_dword v[0:1], v4
598+
; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5
599+
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2
600+
; VI-NEXT: v_max_f32_e32 v2, v3, v2
601+
; VI-NEXT: flat_store_dword v[0:1], v5
540602
; VI-NEXT: flat_store_dword v[0:1], v2
541603
; VI-NEXT: s_endpgm
542604
;
@@ -548,13 +610,20 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(
548610
; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
549611
; GFX9-NEXT: global_load_dword v2, v0, s[4:5]
550612
; GFX9-NEXT: global_load_dword v3, v0, s[6:7]
613+
; GFX9-NEXT: s_waitcnt vmcnt(2)
614+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
551615
; GFX9-NEXT: s_waitcnt vmcnt(1)
616+
; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
552617
; GFX9-NEXT: v_min_f32_e32 v4, v1, v2
553618
; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
554619
; GFX9-NEXT: global_store_dword v[0:1], v4, off
555620
; GFX9-NEXT: s_waitcnt vmcnt(1)
621+
; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
622+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
556623
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
557-
; GFX9-NEXT: v_max_f32_e32 v1, v4, v1
624+
; GFX9-NEXT: v_max_f32_e32 v2, v4, v4
625+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
626+
; GFX9-NEXT: v_max_f32_e32 v1, v2, v1
558627
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
559628
; GFX9-NEXT: s_endpgm
560629
%tid = call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)