@@ -240,15 +240,15 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
240
240
; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
241
241
; GCN-IR-NEXT: v_or_b32_e32 v0, v2, v0
242
242
; GCN-IR-NEXT: v_or_b32_e32 v1, v3, v1
243
- ; GCN-IR-NEXT: BB0_7: ; %Flow7
243
+ ; GCN-IR-NEXT: BB0_7: ; %udiv-end
244
244
; GCN-IR-NEXT: s_xor_b64 s[0:1], s[8:9], s[2:3]
245
245
; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0
246
246
; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1
247
247
; GCN-IR-NEXT: v_mov_b32_e32 v2, s1
248
248
; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
249
- ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
250
249
; GCN-IR-NEXT: s_mov_b32 s7, 0xf000
251
250
; GCN-IR-NEXT: s_mov_b32 s6, -1
251
+ ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
252
252
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
253
253
; GCN-IR-NEXT: s_endpgm
254
254
%result = sdiv i64 %x , %y
@@ -411,26 +411,26 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
411
411
; GCN-IR-NEXT: v_ffbh_u32_e32 v7, v10
412
412
; GCN-IR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
413
413
; GCN-IR-NEXT: v_cndmask_b32_e32 v14, v7, v0, vcc
414
- ; GCN-IR-NEXT: v_sub_i32_e32 v11 , vcc, v13, v14
415
- ; GCN-IR-NEXT: v_subb_u32_e64 v12 , s[4:5], 0, 0, vcc
416
- ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[11:12 ]
417
- ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[4:5], 63, v[11:12 ]
414
+ ; GCN-IR-NEXT: v_sub_i32_e32 v7 , vcc, v13, v14
415
+ ; GCN-IR-NEXT: v_subb_u32_e64 v8 , s[4:5], 0, 0, vcc
416
+ ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[7:8 ]
417
+ ; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[4:5], 63, v[7:8 ]
418
418
; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], vcc
419
419
; GCN-IR-NEXT: s_xor_b64 s[8:9], s[6:7], -1
420
420
; GCN-IR-NEXT: v_mov_b32_e32 v18, 0
421
421
; GCN-IR-NEXT: v_mov_b32_e32 v6, v4
422
422
; GCN-IR-NEXT: v_mov_b32_e32 v1, v5
423
- ; GCN-IR-NEXT: v_cndmask_b32_e64 v7 , v10, 0, s[6:7]
423
+ ; GCN-IR-NEXT: v_cndmask_b32_e64 v12 , v10, 0, s[6:7]
424
424
; GCN-IR-NEXT: s_and_b64 s[4:5], s[8:9], s[4:5]
425
425
; GCN-IR-NEXT: v_mov_b32_e32 v15, v18
426
426
; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v9, 0, s[6:7]
427
427
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
428
428
; GCN-IR-NEXT: s_cbranch_execz BB1_6
429
429
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
430
- ; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, 1, v11
431
- ; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, 0, v12 , vcc
432
- ; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 63, v11
433
- ; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[16:17], v[11:12 ]
430
+ ; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, 1, v7
431
+ ; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, 0, v8 , vcc
432
+ ; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 63, v7
433
+ ; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[16:17], v[7:8 ]
434
434
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
435
435
; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[9:10], v0
436
436
; GCN-IR-NEXT: s_mov_b64 s[8:9], 0
@@ -480,14 +480,14 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
480
480
; GCN-IR-NEXT: BB1_5: ; %Flow3
481
481
; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11]
482
482
; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[7:8], 1
483
- ; GCN-IR-NEXT: v_or_b32_e32 v7 , v12, v3
483
+ ; GCN-IR-NEXT: v_or_b32_e32 v12 , v12, v3
484
484
; GCN-IR-NEXT: v_or_b32_e32 v0, v11, v2
485
485
; GCN-IR-NEXT: BB1_6: ; %Flow4
486
486
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
487
487
; GCN-IR-NEXT: v_xor_b32_e32 v2, v5, v4
488
488
; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v2
489
489
; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v6
490
- ; GCN-IR-NEXT: v_xor_b32_e32 v3, v7 , v1
490
+ ; GCN-IR-NEXT: v_xor_b32_e32 v3, v12 , v1
491
491
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
492
492
; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc
493
493
; GCN-IR-NEXT: s_setpc_b64 s[30:31]
@@ -1111,7 +1111,7 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48
1111
1111
; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
1112
1112
; GCN-IR-NEXT: v_or_b32_e32 v0, v2, v0
1113
1113
; GCN-IR-NEXT: v_or_b32_e32 v1, v3, v1
1114
- ; GCN-IR-NEXT: BB9_7: ; %Flow4
1114
+ ; GCN-IR-NEXT: BB9_7: ; %udiv-end
1115
1115
; GCN-IR-NEXT: s_xor_b64 s[0:1], s[6:7], s[2:3]
1116
1116
; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0
1117
1117
; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1
@@ -1341,9 +1341,9 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x)
1341
1341
; GCN-IR-NEXT: v_xor_b32_e32 v1, s3, v1
1342
1342
; GCN-IR-NEXT: v_mov_b32_e32 v2, s3
1343
1343
; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
1344
- ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
1345
1344
; GCN-IR-NEXT: s_mov_b32 s7, 0xf000
1346
1345
; GCN-IR-NEXT: s_mov_b32 s6, -1
1346
+ ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
1347
1347
; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1348
1348
; GCN-IR-NEXT: s_endpgm
1349
1349
%result = sdiv i64 24 , %x
0 commit comments