@@ -338,25 +338,29 @@ define i24 @v_mul_add_1_i24_zext(i24 zeroext %x, i24 zeroext %y) {
338
338
; GFX67-LABEL: v_mul_add_1_i24_zext:
339
339
; GFX67: ; %bb.0:
340
340
; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341
- ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
341
+ ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
342
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
342
343
; GFX67-NEXT: s_setpc_b64 s[30:31]
343
344
;
344
345
; GFX8-LABEL: v_mul_add_1_i24_zext:
345
346
; GFX8: ; %bb.0:
346
347
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347
- ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
348
+ ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
349
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
348
350
; GFX8-NEXT: s_setpc_b64 s[30:31]
349
351
;
350
352
; GFX9-LABEL: v_mul_add_1_i24_zext:
351
353
; GFX9: ; %bb.0:
352
354
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353
- ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
355
+ ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
356
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
354
357
; GFX9-NEXT: s_setpc_b64 s[30:31]
355
358
;
356
359
; GFX10-LABEL: v_mul_add_1_i24_zext:
357
360
; GFX10: ; %bb.0:
358
361
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359
- ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
362
+ ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
363
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
360
364
; GFX10-NEXT: s_setpc_b64 s[30:31]
361
365
%add = add i24 %y , 1
362
366
%mul = mul i24 %x , %add
@@ -429,25 +433,29 @@ define i24 @v_mul_add_1_i24_sext(i24 signext %x, i24 signext %y) {
429
433
; GFX67-LABEL: v_mul_add_1_i24_sext:
430
434
; GFX67: ; %bb.0:
431
435
; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432
- ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
436
+ ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
437
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
433
438
; GFX67-NEXT: s_setpc_b64 s[30:31]
434
439
;
435
440
; GFX8-LABEL: v_mul_add_1_i24_sext:
436
441
; GFX8: ; %bb.0:
437
442
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438
- ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
443
+ ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
444
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
439
445
; GFX8-NEXT: s_setpc_b64 s[30:31]
440
446
;
441
447
; GFX9-LABEL: v_mul_add_1_i24_sext:
442
448
; GFX9: ; %bb.0:
443
449
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444
- ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
450
+ ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
451
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
445
452
; GFX9-NEXT: s_setpc_b64 s[30:31]
446
453
;
447
454
; GFX10-LABEL: v_mul_add_1_i24_sext:
448
455
; GFX10: ; %bb.0:
449
456
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450
- ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
457
+ ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
458
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
451
459
; GFX10-NEXT: s_setpc_b64 s[30:31]
452
460
%add = add i24 %y , 1
453
461
%mul = mul i24 %x , %add
@@ -2306,29 +2314,37 @@ define <2 x i24> @v_mul_add_1_v2i24(<2 x i24> %x, <2 x i24> %y) {
2306
2314
; GFX67-LABEL: v_mul_add_1_v2i24:
2307
2315
; GFX67: ; %bb.0:
2308
2316
; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2309
- ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2310
- ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2317
+ ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2318
+ ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2319
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2320
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2311
2321
; GFX67-NEXT: s_setpc_b64 s[30:31]
2312
2322
;
2313
2323
; GFX8-LABEL: v_mul_add_1_v2i24:
2314
2324
; GFX8: ; %bb.0:
2315
2325
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316
- ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2317
- ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2326
+ ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2327
+ ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2328
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2329
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2318
2330
; GFX8-NEXT: s_setpc_b64 s[30:31]
2319
2331
;
2320
2332
; GFX9-LABEL: v_mul_add_1_v2i24:
2321
2333
; GFX9: ; %bb.0:
2322
2334
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323
- ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2324
- ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2335
+ ; GFX9-NEXT: v_add_u32_e32 v3, 1, v3
2336
+ ; GFX9-NEXT: v_add_u32_e32 v2, 1, v2
2337
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2338
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2325
2339
; GFX9-NEXT: s_setpc_b64 s[30:31]
2326
2340
;
2327
2341
; GFX10-LABEL: v_mul_add_1_v2i24:
2328
2342
; GFX10: ; %bb.0:
2329
2343
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2330
- ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2331
- ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2344
+ ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v2
2345
+ ; GFX10-NEXT: v_add_nc_u32_e32 v3, 1, v3
2346
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2347
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2332
2348
; GFX10-NEXT: s_setpc_b64 s[30:31]
2333
2349
%add = add <2 x i24 > %y , <i24 1 , i24 1 >
2334
2350
%mul = mul <2 x i24 > %x , %add
@@ -2339,29 +2355,37 @@ define <2 x i24> @v_mul_add_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) {
2339
2355
; GFX67-LABEL: v_mul_add_1_v2i24_commute:
2340
2356
; GFX67: ; %bb.0:
2341
2357
; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2342
- ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2343
- ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2358
+ ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2359
+ ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2360
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2361
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2344
2362
; GFX67-NEXT: s_setpc_b64 s[30:31]
2345
2363
;
2346
2364
; GFX8-LABEL: v_mul_add_1_v2i24_commute:
2347
2365
; GFX8: ; %bb.0:
2348
2366
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349
- ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2350
- ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2367
+ ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2368
+ ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2369
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2370
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2351
2371
; GFX8-NEXT: s_setpc_b64 s[30:31]
2352
2372
;
2353
2373
; GFX9-LABEL: v_mul_add_1_v2i24_commute:
2354
2374
; GFX9: ; %bb.0:
2355
2375
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356
- ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2357
- ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2376
+ ; GFX9-NEXT: v_add_u32_e32 v3, 1, v3
2377
+ ; GFX9-NEXT: v_add_u32_e32 v2, 1, v2
2378
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2379
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2358
2380
; GFX9-NEXT: s_setpc_b64 s[30:31]
2359
2381
;
2360
2382
; GFX10-LABEL: v_mul_add_1_v2i24_commute:
2361
2383
; GFX10: ; %bb.0:
2362
2384
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2363
- ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2364
- ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2385
+ ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v2
2386
+ ; GFX10-NEXT: v_add_nc_u32_e32 v3, 1, v3
2387
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2388
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2365
2389
; GFX10-NEXT: s_setpc_b64 s[30:31]
2366
2390
%add = add <2 x i24 > %y , <i24 1 , i24 1 >
2367
2391
%mul = mul <2 x i24 > %add , %x
@@ -3692,10 +3716,186 @@ define <2 x i8> @v_mul_add_1_v2i8_commute(<2 x i8> %x, <2 x i8> %y) {
3692
3716
ret <2 x i8 > %mul
3693
3717
}
3694
3718
3719
+ ; test mul_u24 intrinsic with (i32, i32) -> i64
3720
+ define i64 @mul_u24_with_uneven_operands (i32 %z ) {
3721
+ ; GFX67-LABEL: mul_u24_with_uneven_operands:
3722
+ ; GFX67: ; %bb.0: ; %entry
3723
+ ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3724
+ ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3725
+ ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v0
3726
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3727
+ ; GFX67-NEXT: v_mov_b32_e32 v1, 0
3728
+ ; GFX67-NEXT: s_setpc_b64 s[30:31]
3729
+ ;
3730
+ ; GFX8-LABEL: mul_u24_with_uneven_operands:
3731
+ ; GFX8: ; %bb.0: ; %entry
3732
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3733
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3734
+ ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v0
3735
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3736
+ ; GFX8-NEXT: v_mov_b32_e32 v1, 0
3737
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
3738
+ ;
3739
+ ; GFX9-LABEL: mul_u24_with_uneven_operands:
3740
+ ; GFX9: ; %bb.0: ; %entry
3741
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3742
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3743
+ ; GFX9-NEXT: v_add_u32_e32 v1, 1, v0
3744
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3745
+ ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3746
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
3747
+ ;
3748
+ ; GFX10-LABEL: mul_u24_with_uneven_operands:
3749
+ ; GFX10: ; %bb.0: ; %entry
3750
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3751
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
3752
+ ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v0
3753
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3754
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3755
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
3756
+ entry:
3757
+ %c = and i32 %z , 1
3758
+ %d = add nuw nsw i32 %c , 1
3759
+ %f = call i64 @llvm.amdgcn.mul.u24 (i32 %d , i32 %c )
3760
+ ret i64 %f
3761
+ }
3762
+
3763
+ define i64 @mul_u24_with_uneven_operands_swapped (i32 %z ) {
3764
+ ; GFX67-LABEL: mul_u24_with_uneven_operands_swapped:
3765
+ ; GFX67: ; %bb.0: ; %entry
3766
+ ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3767
+ ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3768
+ ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v0
3769
+ ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3770
+ ; GFX67-NEXT: v_mov_b32_e32 v1, 0
3771
+ ; GFX67-NEXT: s_setpc_b64 s[30:31]
3772
+ ;
3773
+ ; GFX8-LABEL: mul_u24_with_uneven_operands_swapped:
3774
+ ; GFX8: ; %bb.0: ; %entry
3775
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3776
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3777
+ ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v0
3778
+ ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3779
+ ; GFX8-NEXT: v_mov_b32_e32 v1, 0
3780
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
3781
+ ;
3782
+ ; GFX9-LABEL: mul_u24_with_uneven_operands_swapped:
3783
+ ; GFX9: ; %bb.0: ; %entry
3784
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3785
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3786
+ ; GFX9-NEXT: v_add_u32_e32 v1, 1, v0
3787
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3788
+ ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3789
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
3790
+ ;
3791
+ ; GFX10-LABEL: mul_u24_with_uneven_operands_swapped:
3792
+ ; GFX10: ; %bb.0: ; %entry
3793
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3794
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
3795
+ ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v0
3796
+ ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3797
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3798
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
3799
+ entry:
3800
+ %c = and i32 %z , 1
3801
+ %d = add nuw nsw i32 %c , 1
3802
+ %f = call i64 @llvm.amdgcn.mul.u24 (i32 %c , i32 %d )
3803
+ ret i64 %f
3804
+ }
3805
+
3806
+ ; test mul_i24 intrinsic with (i32, i32) -> i64
3807
+ define i64 @mul_i24_with_uneven_operands (i32 %z ) {
3808
+ ; GFX67-LABEL: mul_i24_with_uneven_operands:
3809
+ ; GFX67: ; %bb.0: ; %entry
3810
+ ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3811
+ ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3812
+ ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
3813
+ ; GFX67-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3814
+ ; GFX67-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3815
+ ; GFX67-NEXT: s_setpc_b64 s[30:31]
3816
+ ;
3817
+ ; GFX8-LABEL: mul_i24_with_uneven_operands:
3818
+ ; GFX8: ; %bb.0: ; %entry
3819
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3820
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3821
+ ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
3822
+ ; GFX8-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3823
+ ; GFX8-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3824
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
3825
+ ;
3826
+ ; GFX9-LABEL: mul_i24_with_uneven_operands:
3827
+ ; GFX9: ; %bb.0: ; %entry
3828
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3829
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3830
+ ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
3831
+ ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3832
+ ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3833
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
3834
+ ;
3835
+ ; GFX10-LABEL: mul_i24_with_uneven_operands:
3836
+ ; GFX10: ; %bb.0: ; %entry
3837
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3838
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v0
3839
+ ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v1
3840
+ ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v2, v1
3841
+ ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v1
3842
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
3843
+ entry:
3844
+ %c = and i32 %z , 1
3845
+ %d = add nuw nsw i32 %c , 1
3846
+ %f = call i64 @llvm.amdgcn.mul.i24 (i32 %d , i32 %c )
3847
+ ret i64 %f
3848
+ }
3849
+
3850
+ define i64 @mul_i24_with_uneven_operands_swapped (i32 %z ) {
3851
+ ; GFX67-LABEL: mul_i24_with_uneven_operands_swapped:
3852
+ ; GFX67: ; %bb.0: ; %entry
3853
+ ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3854
+ ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3855
+ ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
3856
+ ; GFX67-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3857
+ ; GFX67-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3858
+ ; GFX67-NEXT: s_setpc_b64 s[30:31]
3859
+ ;
3860
+ ; GFX8-LABEL: mul_i24_with_uneven_operands_swapped:
3861
+ ; GFX8: ; %bb.0: ; %entry
3862
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3863
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3864
+ ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
3865
+ ; GFX8-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3866
+ ; GFX8-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3867
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
3868
+ ;
3869
+ ; GFX9-LABEL: mul_i24_with_uneven_operands_swapped:
3870
+ ; GFX9: ; %bb.0: ; %entry
3871
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3872
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3873
+ ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
3874
+ ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3875
+ ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3876
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
3877
+ ;
3878
+ ; GFX10-LABEL: mul_i24_with_uneven_operands_swapped:
3879
+ ; GFX10: ; %bb.0: ; %entry
3880
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3881
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v0
3882
+ ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v1
3883
+ ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v1, v2
3884
+ ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v1, v2
3885
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
3886
+ entry:
3887
+ %c = and i32 %z , 1
3888
+ %d = add nuw nsw i32 %c , 1
3889
+ %f = call i64 @llvm.amdgcn.mul.i24 (i32 %c , i32 %d )
3890
+ ret i64 %f
3891
+ }
3892
+
3695
3893
declare align 4 ptr addrspace (4 ) @llvm.amdgcn.implicitarg.ptr () #2
3696
3894
declare i32 @llvm.amdgcn.workitem.id.x () #2
3697
3895
declare align 4 ptr addrspace (4 ) @llvm.amdgcn.dispatch.ptr () #2
3698
3896
declare i32 @llvm.amdgcn.workgroup.id.x () #2
3897
+ declare i64 @llvm.amdgcn.mul.u24 (i32 , i32 )
3898
+ declare i64 @llvm.amdgcn.mul.i24 (i32 , i32 )
3699
3899
3700
3900
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
3701
3901
attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) }
0 commit comments