@@ -408,4 +408,280 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
408
408
ret <2 x i16 > %ret
409
409
}
410
410
411
+ define float @flat_atomic_fadd_f32_intrinsic_ret__posoffset (ptr %ptr , float %data ) {
412
+ ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
413
+ ; GFX940: ; %bb.0:
414
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415
+ ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 sc0
416
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
417
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
418
+ ;
419
+ ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
420
+ ; GFX12: ; %bb.0:
421
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
422
+ ; GFX12-NEXT: s_wait_expcnt 0x0
423
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
424
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
425
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
426
+ ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
427
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
428
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
429
+ %gep = getelementptr float , ptr %ptr , i64 1023
430
+ %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
431
+ ret float %result
432
+ }
433
+
434
+ define float @flat_atomic_fadd_f32_intrinsic_ret__negoffset (ptr %ptr , float %data ) {
435
+ ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
436
+ ; GFX940: ; %bb.0:
437
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438
+ ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:64512 sc0
439
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
440
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
441
+ ;
442
+ ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
443
+ ; GFX12: ; %bb.0:
444
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
445
+ ; GFX12-NEXT: s_wait_expcnt 0x0
446
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
447
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
448
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
449
+ ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
450
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
451
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
452
+ %gep = getelementptr float , ptr %ptr , i64 -256
453
+ %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
454
+ ret float %result
455
+ }
456
+
457
+ define void @flat_atomic_fadd_f32_intrinsic_noret__posoffset (ptr %ptr , float %data ) {
458
+ ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
459
+ ; GFX940: ; %bb.0:
460
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461
+ ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
462
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
463
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
464
+ ;
465
+ ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
466
+ ; GFX12: ; %bb.0:
467
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
468
+ ; GFX12-NEXT: s_wait_expcnt 0x0
469
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
470
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
471
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
472
+ ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
473
+ ; GFX12-NEXT: s_wait_dscnt 0x0
474
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
475
+ %gep = getelementptr float , ptr %ptr , i64 1023
476
+ %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
477
+ ret void
478
+ }
479
+
480
+ define void @flat_atomic_fadd_f32_intrinsic_noret__negoffset (ptr %ptr , float %data ) {
481
+ ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
482
+ ; GFX940: ; %bb.0:
483
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484
+ ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:64512
485
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
486
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
487
+ ;
488
+ ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
489
+ ; GFX12: ; %bb.0:
490
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
491
+ ; GFX12-NEXT: s_wait_expcnt 0x0
492
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
493
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
494
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
495
+ ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-1024
496
+ ; GFX12-NEXT: s_wait_dscnt 0x0
497
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
498
+ %gep = getelementptr float , ptr %ptr , i64 -256
499
+ %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
500
+ ret void
501
+ }
502
+
503
+ define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__posoffset (ptr %ptr , <2 x half > %data ) {
504
+ ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
505
+ ; GFX940: ; %bb.0:
506
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 sc0
508
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
509
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
510
+ ;
511
+ ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
512
+ ; GFX12: ; %bb.0:
513
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
514
+ ; GFX12-NEXT: s_wait_expcnt 0x0
515
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
516
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
517
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
518
+ ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
519
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
520
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
521
+ %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
522
+ %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
523
+ ret <2 x half > %result
524
+ }
525
+
526
+ define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__negoffset (ptr %ptr , <2 x half > %data ) {
527
+ ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
528
+ ; GFX940: ; %bb.0:
529
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:64512 sc0
531
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
532
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
533
+ ;
534
+ ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
535
+ ; GFX12: ; %bb.0:
536
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
537
+ ; GFX12-NEXT: s_wait_expcnt 0x0
538
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
539
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
540
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
541
+ ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
542
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
543
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
544
+ %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
545
+ %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
546
+ ret <2 x half > %result
547
+ }
548
+
549
+ define void @flat_atomic_fadd_v2f16_intrinsic_noret__posoffset (ptr %ptr , <2 x half > %data ) {
550
+ ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
551
+ ; GFX940: ; %bb.0:
552
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
554
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
555
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
556
+ ;
557
+ ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
558
+ ; GFX12: ; %bb.0:
559
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
560
+ ; GFX12-NEXT: s_wait_expcnt 0x0
561
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
562
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
563
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
564
+ ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
565
+ ; GFX12-NEXT: s_wait_dscnt 0x0
566
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
567
+ %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
568
+ %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
569
+ ret void
570
+ }
571
+
572
+ define void @flat_atomic_fadd_v2f16_intrinsic_noret__negoffset (ptr %ptr , <2 x half > %data ) {
573
+ ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
574
+ ; GFX940: ; %bb.0:
575
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:64512
577
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
578
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
579
+ ;
580
+ ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
581
+ ; GFX12: ; %bb.0:
582
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
583
+ ; GFX12-NEXT: s_wait_expcnt 0x0
584
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
585
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
586
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
587
+ ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:-1024
588
+ ; GFX12-NEXT: s_wait_dscnt 0x0
589
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
590
+ %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
591
+ %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
592
+ ret void
593
+ }
594
+
595
+ define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset (ptr %ptr , <2 x i16 > %data ) {
596
+ ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
597
+ ; GFX940: ; %bb.0:
598
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 sc0
600
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
601
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
602
+ ;
603
+ ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
604
+ ; GFX12: ; %bb.0:
605
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
606
+ ; GFX12-NEXT: s_wait_expcnt 0x0
607
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
608
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
609
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
610
+ ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
611
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
612
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
613
+ %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
614
+ %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
615
+ ret <2 x i16 > %result
616
+ }
617
+
618
+ define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset (ptr %ptr , <2 x i16 > %data ) {
619
+ ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
620
+ ; GFX940: ; %bb.0:
621
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:64512 sc0
623
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
624
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
625
+ ;
626
+ ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
627
+ ; GFX12: ; %bb.0:
628
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
629
+ ; GFX12-NEXT: s_wait_expcnt 0x0
630
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
631
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
632
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
633
+ ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
634
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
635
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
636
+ %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
637
+ %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
638
+ ret <2 x i16 > %result
639
+ }
640
+
641
+ define void @flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset (ptr %ptr , <2 x i16 > %data ) {
642
+ ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
643
+ ; GFX940: ; %bb.0:
644
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
646
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
647
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
648
+ ;
649
+ ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
650
+ ; GFX12: ; %bb.0:
651
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
652
+ ; GFX12-NEXT: s_wait_expcnt 0x0
653
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
654
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
655
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
656
+ ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
657
+ ; GFX12-NEXT: s_wait_dscnt 0x0
658
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
659
+ %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
660
+ %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
661
+ ret void
662
+ }
663
+
664
+ define void @flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset (ptr %ptr , <2 x i16 > %data ) {
665
+ ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
666
+ ; GFX940: ; %bb.0:
667
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668
+ ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:64512
669
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
671
+ ;
672
+ ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
673
+ ; GFX12: ; %bb.0:
674
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
675
+ ; GFX12-NEXT: s_wait_expcnt 0x0
676
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
677
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
678
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
679
+ ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-1024
680
+ ; GFX12-NEXT: s_wait_dscnt 0x0
681
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
682
+ %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
683
+ %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
684
+ ret void
685
+ }
686
+
411
687
attributes #0 = { "denormal-fp-math-f32" ="ieee,ieee" }
0 commit comments