@@ -146,13 +146,13 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
146
146
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
147
147
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
148
148
; CHECK-NEXT: v_mov_b32_e32 v41, v31
149
+ ; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
149
150
; CHECK-NEXT: s_mov_b32 s42, s15
150
151
; CHECK-NEXT: s_mov_b32 s43, s14
151
152
; CHECK-NEXT: s_mov_b32 s44, s13
152
153
; CHECK-NEXT: s_mov_b32 s45, s12
153
- ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
154
- ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
155
- ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
154
+ ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
155
+ ; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
156
156
; CHECK-NEXT: v_mov_b32_e32 v42, v2
157
157
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
158
158
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -163,9 +163,9 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
163
163
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
164
164
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
165
165
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
166
- ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39 ]
167
- ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37 ]
168
- ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35 ]
166
+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35 ]
167
+ ; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39 ]
168
+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37 ]
169
169
; CHECK-NEXT: s_mov_b32 s12, s45
170
170
; CHECK-NEXT: s_mov_b32 s13, s44
171
171
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -285,13 +285,13 @@ define double @test_powr_fast_f64(double %x, double %y) {
285
285
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
286
286
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
287
287
; CHECK-NEXT: v_mov_b32_e32 v43, v31
288
+ ; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
288
289
; CHECK-NEXT: s_mov_b32 s42, s15
289
290
; CHECK-NEXT: s_mov_b32 s43, s14
290
291
; CHECK-NEXT: s_mov_b32 s44, s13
291
292
; CHECK-NEXT: s_mov_b32 s45, s12
292
- ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
293
- ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
294
- ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
293
+ ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
294
+ ; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
295
295
; CHECK-NEXT: v_mov_b32_e32 v42, v3
296
296
; CHECK-NEXT: v_mov_b32_e32 v41, v2
297
297
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -302,9 +302,9 @@ define double @test_powr_fast_f64(double %x, double %y) {
302
302
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
303
303
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
304
304
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
305
- ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39 ]
306
- ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37 ]
307
- ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35 ]
305
+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35 ]
306
+ ; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39 ]
307
+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37 ]
308
308
; CHECK-NEXT: s_mov_b32 s12, s45
309
309
; CHECK-NEXT: s_mov_b32 s13, s44
310
310
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -430,13 +430,13 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
430
430
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
431
431
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
432
432
; CHECK-NEXT: v_mov_b32_e32 v41, v31
433
+ ; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
433
434
; CHECK-NEXT: s_mov_b32 s42, s15
434
435
; CHECK-NEXT: s_mov_b32 s43, s14
435
436
; CHECK-NEXT: s_mov_b32 s44, s13
436
437
; CHECK-NEXT: s_mov_b32 s45, s12
437
- ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
438
- ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
439
- ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
438
+ ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
439
+ ; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
440
440
; CHECK-NEXT: v_mov_b32_e32 v42, v2
441
441
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
442
442
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -447,9 +447,9 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
447
447
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
448
448
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
449
449
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
450
- ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39 ]
451
- ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37 ]
452
- ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35 ]
450
+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35 ]
451
+ ; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39 ]
452
+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37 ]
453
453
; CHECK-NEXT: s_mov_b32 s12, s45
454
454
; CHECK-NEXT: s_mov_b32 s13, s44
455
455
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -571,13 +571,13 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
571
571
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
572
572
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
573
573
; CHECK-NEXT: v_mov_b32_e32 v41, v31
574
+ ; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
574
575
; CHECK-NEXT: s_mov_b32 s42, s15
575
576
; CHECK-NEXT: s_mov_b32 s43, s14
576
577
; CHECK-NEXT: s_mov_b32 s44, s13
577
578
; CHECK-NEXT: s_mov_b32 s45, s12
578
- ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
579
- ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
580
- ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
579
+ ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
580
+ ; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
581
581
; CHECK-NEXT: v_lshlrev_b32_e32 v42, 1, v2
582
582
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
583
583
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -588,9 +588,9 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
588
588
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
589
589
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
590
590
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
591
- ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39 ]
592
- ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37 ]
593
- ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35 ]
591
+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35 ]
592
+ ; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39 ]
593
+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37 ]
594
594
; CHECK-NEXT: s_mov_b32 s12, s45
595
595
; CHECK-NEXT: s_mov_b32 s13, s44
596
596
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -715,13 +715,13 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
715
715
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
716
716
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
717
717
; CHECK-NEXT: v_mov_b32_e32 v41, v31
718
+ ; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
718
719
; CHECK-NEXT: s_mov_b32 s42, s15
719
720
; CHECK-NEXT: s_mov_b32 s43, s14
720
721
; CHECK-NEXT: s_mov_b32 s44, s13
721
722
; CHECK-NEXT: s_mov_b32 s45, s12
722
- ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
723
- ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
724
- ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
723
+ ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
724
+ ; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
725
725
; CHECK-NEXT: v_or_b32_e32 v43, 1, v2
726
726
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
727
727
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -732,9 +732,9 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
732
732
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
733
733
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
734
734
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
735
- ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39 ]
736
- ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37 ]
737
- ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35 ]
735
+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35 ]
736
+ ; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39 ]
737
+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37 ]
738
738
; CHECK-NEXT: s_mov_b32 s12, s45
739
739
; CHECK-NEXT: s_mov_b32 s13, s44
740
740
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -773,3 +773,6 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
773
773
%call = tail call fast double @_Z4powndi (double %x , i32 %y )
774
774
ret double %call
775
775
}
776
+
777
+ !llvm.module.flags = !{!0 }
778
+ !0 = !{i32 1 , !"amdgpu_code_object_version" , i32 500 }
0 commit comments