@@ -370,27 +370,6 @@ multiclass FLAT_Global_Atomic_Pseudo<
370
370
FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
371
371
FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
372
372
373
- class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
374
- (ops node:$ptr, node:$value),
375
- (atomic_op node:$ptr, node:$value),
376
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
377
- >;
378
-
379
- def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
380
- def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
381
- def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
382
- def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
383
- def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
384
- def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
385
- def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
386
- def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
387
- def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
388
- def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
389
- def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
390
- def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
391
- def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
392
-
393
-
394
373
395
374
//===----------------------------------------------------------------------===//
396
375
// Flat Instructions
@@ -425,84 +404,84 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
425
404
}
426
405
427
406
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
428
- VGPR_32, i32, atomic_cmp_swap_flat ,
407
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32 ,
429
408
v2i32, VReg_64>;
430
409
431
410
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
432
- VReg_64, i64, atomic_cmp_swap_flat ,
411
+ VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64 ,
433
412
v2i64, VReg_128>;
434
413
435
414
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
436
- VGPR_32, i32, atomic_swap_flat >;
415
+ VGPR_32, i32, atomic_swap_flat_32 >;
437
416
438
417
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
439
- VReg_64, i64, atomic_swap_flat >;
418
+ VReg_64, i64, atomic_swap_flat_64 >;
440
419
441
420
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
442
- VGPR_32, i32, atomic_add_flat >;
421
+ VGPR_32, i32, atomic_load_add_flat_32 >;
443
422
444
423
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
445
- VGPR_32, i32, atomic_sub_flat >;
424
+ VGPR_32, i32, atomic_load_sub_flat_32 >;
446
425
447
426
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
448
- VGPR_32, i32, atomic_min_flat >;
427
+ VGPR_32, i32, atomic_load_min_flat_32 >;
449
428
450
429
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
451
- VGPR_32, i32, atomic_umin_flat >;
430
+ VGPR_32, i32, atomic_load_umin_flat_32 >;
452
431
453
432
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
454
- VGPR_32, i32, atomic_max_flat >;
433
+ VGPR_32, i32, atomic_load_max_flat_32 >;
455
434
456
435
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
457
- VGPR_32, i32, atomic_umax_flat >;
436
+ VGPR_32, i32, atomic_load_umax_flat_32 >;
458
437
459
438
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
460
- VGPR_32, i32, atomic_and_flat >;
439
+ VGPR_32, i32, atomic_load_and_flat_32 >;
461
440
462
441
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
463
- VGPR_32, i32, atomic_or_flat >;
442
+ VGPR_32, i32, atomic_load_or_flat_32 >;
464
443
465
444
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
466
- VGPR_32, i32, atomic_xor_flat >;
445
+ VGPR_32, i32, atomic_load_xor_flat_32 >;
467
446
468
447
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
469
- VGPR_32, i32, atomic_inc_flat >;
448
+ VGPR_32, i32, atomic_inc_flat_32 >;
470
449
471
450
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
472
- VGPR_32, i32, atomic_dec_flat >;
451
+ VGPR_32, i32, atomic_dec_flat_32 >;
473
452
474
453
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
475
- VReg_64, i64, atomic_add_flat >;
454
+ VReg_64, i64, atomic_load_add_flat_64 >;
476
455
477
456
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
478
- VReg_64, i64, atomic_sub_flat >;
457
+ VReg_64, i64, atomic_load_sub_flat_64 >;
479
458
480
459
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
481
- VReg_64, i64, atomic_min_flat >;
460
+ VReg_64, i64, atomic_load_min_flat_64 >;
482
461
483
462
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
484
- VReg_64, i64, atomic_umin_flat >;
463
+ VReg_64, i64, atomic_load_umin_flat_64 >;
485
464
486
465
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
487
- VReg_64, i64, atomic_max_flat >;
466
+ VReg_64, i64, atomic_load_max_flat_64 >;
488
467
489
468
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
490
- VReg_64, i64, atomic_umax_flat >;
469
+ VReg_64, i64, atomic_load_umax_flat_64 >;
491
470
492
471
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
493
- VReg_64, i64, atomic_and_flat >;
472
+ VReg_64, i64, atomic_load_and_flat_64 >;
494
473
495
474
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
496
- VReg_64, i64, atomic_or_flat >;
475
+ VReg_64, i64, atomic_load_or_flat_64 >;
497
476
498
477
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
499
- VReg_64, i64, atomic_xor_flat >;
478
+ VReg_64, i64, atomic_load_xor_flat_64 >;
500
479
501
480
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
502
- VReg_64, i64, atomic_inc_flat >;
481
+ VReg_64, i64, atomic_inc_flat_64 >;
503
482
504
483
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
505
- VReg_64, i64, atomic_dec_flat >;
484
+ VReg_64, i64, atomic_dec_flat_64 >;
506
485
507
486
// GFX7-, GFX10-only flat instructions.
508
487
let SubtargetPredicate = isGFX7GFX10 in {
@@ -556,11 +535,11 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
556
535
557
536
let is_flat_global = 1 in {
558
537
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
559
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global ,
538
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32 ,
560
539
v2i32, VReg_64>;
561
540
562
541
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
563
- VReg_64, i64, AMDGPUatomic_cmp_swap_global ,
542
+ VReg_64, i64, AMDGPUatomic_cmp_swap_global_64 ,
564
543
v2i64, VReg_128>;
565
544
566
545
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -813,7 +792,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
813
792
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
814
793
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
815
794
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
816
- def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global , i32, v2i32>;
795
+ def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32 , i32, v2i32>;
817
796
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
818
797
819
798
def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -827,7 +806,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
827
806
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
828
807
def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
829
808
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
830
- def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global , i64, v2i64>;
809
+ def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64 , i64, v2i64>;
831
810
def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
832
811
833
812
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
@@ -923,7 +902,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i3
923
902
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
924
903
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
925
904
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
926
- def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global , i32, v2i32>;
905
+ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32 , i32, v2i32>;
927
906
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
928
907
929
908
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -937,7 +916,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64,
937
916
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
938
917
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
939
918
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
940
- def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global , i64, v2i64>;
919
+ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64 , i64, v2i64>;
941
920
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
942
921
943
922
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
0 commit comments