@@ -279,125 +279,27 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
279
279
; CHECK: ; %bb.0:
280
280
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281
281
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
282
- ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
283
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
284
- ; CHECK-NEXT: v_mov_b32_e32 v4, 0x1000
282
+ ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1
285
283
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
286
- ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
287
- ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
288
- ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
289
- ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
290
- ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
291
- ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
292
- ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
293
- ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
294
- ; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v2
295
- ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
296
- ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
297
- ; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
298
- ; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x1000, v0
299
- ; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
300
- ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
301
- ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
302
- ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
303
- ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
304
- ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
284
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
305
285
; CHECK-NEXT: s_setpc_b64 s[30:31]
306
286
%result = sdiv i32 %num , 4096
307
287
ret i32 %result
308
288
}
309
289
310
290
define <2 x i32 > @v_sdiv_v2i32_pow2k_denom (<2 x i32 > %num ) {
311
- ; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
312
- ; GISEL: ; %bb.0:
313
- ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314
- ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
315
- ; GISEL-NEXT: v_mov_b32_e32 v3, 0x1000
316
- ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
317
- ; GISEL-NEXT: v_mov_b32_e32 v5, 0xfffff000
318
- ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
319
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
320
- ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
321
- ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
322
- ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
323
- ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
324
- ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
325
- ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
326
- ; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
327
- ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
328
- ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
329
- ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
330
- ; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
331
- ; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v5
332
- ; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
333
- ; GISEL-NEXT: v_lshlrev_b32_e32 v9, 12, v4
334
- ; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
335
- ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
336
- ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
337
- ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
338
- ; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
339
- ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v3
340
- ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3
341
- ; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
342
- ; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
343
- ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
344
- ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
345
- ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
346
- ; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4
347
- ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
348
- ; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc
349
- ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
350
- ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
351
- ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
352
- ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
353
- ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
354
- ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
355
- ; GISEL-NEXT: s_setpc_b64 s[30:31]
356
- ;
357
- ; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
358
- ; CGP: ; %bb.0:
359
- ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360
- ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
361
- ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
362
- ; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
363
- ; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
364
- ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
365
- ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
366
- ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
367
- ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
368
- ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
369
- ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
370
- ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
371
- ; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
372
- ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
373
- ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
374
- ; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
375
- ; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
376
- ; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v4
377
- ; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
378
- ; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v3
379
- ; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v3
380
- ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
381
- ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
382
- ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
383
- ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
384
- ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
385
- ; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
386
- ; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[6:7]
387
- ; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
388
- ; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
389
- ; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4
390
- ; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
391
- ; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
392
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
393
- ; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc
394
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
395
- ; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
396
- ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
397
- ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
398
- ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
399
- ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
400
- ; CGP-NEXT: s_setpc_b64 s[30:31]
291
+ ; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
292
+ ; CHECK: ; %bb.0:
293
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
295
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
296
+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2
297
+ ; CHECK-NEXT: v_lshrrev_b32_e32 v3, 20, v3
298
+ ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
299
+ ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
300
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
301
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 12, v1
302
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
401
303
%result = sdiv <2 x i32 > %num , <i32 4096 , i32 4096 >
402
304
ret <2 x i32 > %result
403
305
}
@@ -884,3 +786,13 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
884
786
%result = sdiv <2 x i32 > %num.mask , %den.mask
885
787
ret <2 x i32 > %result
886
788
}
789
+
790
+ define i32 @v_sdiv_i32_exact (i32 %num ) {
791
+ ; CHECK-LABEL: v_sdiv_i32_exact:
792
+ ; CHECK: ; %bb.0:
793
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794
+ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
795
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
796
+ %result = sdiv exact i32 %num , 4096
797
+ ret i32 %result
798
+ }
0 commit comments