Skip to content

Commit 1d97f8f

Browse files
committed
AMDGPU: Add some addressing mode tests for flat atomic intrinsics
These are incorrectly matching with signed offsets.
1 parent ca7dc89 commit 1d97f8f

File tree

2 files changed

+504
-0
lines changed

2 files changed

+504
-0
lines changed

llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,4 +408,280 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
408408
ret <2 x i16> %ret
409409
}
410410

411+
define float @flat_atomic_fadd_f32_intrinsic_ret__posoffset(ptr %ptr, float %data) {
412+
; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
413+
; GFX940: ; %bb.0:
414+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415+
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 sc0
416+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
417+
; GFX940-NEXT: s_setpc_b64 s[30:31]
418+
;
419+
; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
420+
; GFX12: ; %bb.0:
421+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
422+
; GFX12-NEXT: s_wait_expcnt 0x0
423+
; GFX12-NEXT: s_wait_samplecnt 0x0
424+
; GFX12-NEXT: s_wait_bvhcnt 0x0
425+
; GFX12-NEXT: s_wait_kmcnt 0x0
426+
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
427+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
428+
; GFX12-NEXT: s_setpc_b64 s[30:31]
429+
%gep = getelementptr float, ptr %ptr, i64 1023
430+
%result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %gep, float %data)
431+
ret float %result
432+
}
433+
434+
define float @flat_atomic_fadd_f32_intrinsic_ret__negoffset(ptr %ptr, float %data) {
435+
; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
436+
; GFX940: ; %bb.0:
437+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438+
; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:64512 sc0
439+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
440+
; GFX940-NEXT: s_setpc_b64 s[30:31]
441+
;
442+
; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
443+
; GFX12: ; %bb.0:
444+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
445+
; GFX12-NEXT: s_wait_expcnt 0x0
446+
; GFX12-NEXT: s_wait_samplecnt 0x0
447+
; GFX12-NEXT: s_wait_bvhcnt 0x0
448+
; GFX12-NEXT: s_wait_kmcnt 0x0
449+
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
450+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
451+
; GFX12-NEXT: s_setpc_b64 s[30:31]
452+
%gep = getelementptr float, ptr %ptr, i64 -256
453+
%result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %gep, float %data)
454+
ret float %result
455+
}
456+
457+
define void @flat_atomic_fadd_f32_intrinsic_noret__posoffset(ptr %ptr, float %data) {
458+
; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
459+
; GFX940: ; %bb.0:
460+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461+
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
462+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
463+
; GFX940-NEXT: s_setpc_b64 s[30:31]
464+
;
465+
; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
466+
; GFX12: ; %bb.0:
467+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
468+
; GFX12-NEXT: s_wait_expcnt 0x0
469+
; GFX12-NEXT: s_wait_samplecnt 0x0
470+
; GFX12-NEXT: s_wait_bvhcnt 0x0
471+
; GFX12-NEXT: s_wait_kmcnt 0x0
472+
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
473+
; GFX12-NEXT: s_wait_dscnt 0x0
474+
; GFX12-NEXT: s_setpc_b64 s[30:31]
475+
%gep = getelementptr float, ptr %ptr, i64 1023
476+
%unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %gep, float %data)
477+
ret void
478+
}
479+
480+
define void @flat_atomic_fadd_f32_intrinsic_noret__negoffset(ptr %ptr, float %data) {
481+
; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
482+
; GFX940: ; %bb.0:
483+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484+
; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:64512
485+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
486+
; GFX940-NEXT: s_setpc_b64 s[30:31]
487+
;
488+
; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
489+
; GFX12: ; %bb.0:
490+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
491+
; GFX12-NEXT: s_wait_expcnt 0x0
492+
; GFX12-NEXT: s_wait_samplecnt 0x0
493+
; GFX12-NEXT: s_wait_bvhcnt 0x0
494+
; GFX12-NEXT: s_wait_kmcnt 0x0
495+
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-1024
496+
; GFX12-NEXT: s_wait_dscnt 0x0
497+
; GFX12-NEXT: s_setpc_b64 s[30:31]
498+
%gep = getelementptr float, ptr %ptr, i64 -256
499+
%unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %gep, float %data)
500+
ret void
501+
}
502+
503+
define <2 x half> @flat_atomic_fadd_v2f16_intrinsic_ret__posoffset(ptr %ptr, <2 x half> %data) {
504+
; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
505+
; GFX940: ; %bb.0:
506+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507+
; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 sc0
508+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
509+
; GFX940-NEXT: s_setpc_b64 s[30:31]
510+
;
511+
; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
512+
; GFX12: ; %bb.0:
513+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
514+
; GFX12-NEXT: s_wait_expcnt 0x0
515+
; GFX12-NEXT: s_wait_samplecnt 0x0
516+
; GFX12-NEXT: s_wait_bvhcnt 0x0
517+
; GFX12-NEXT: s_wait_kmcnt 0x0
518+
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
519+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
520+
; GFX12-NEXT: s_setpc_b64 s[30:31]
521+
%gep = getelementptr <2 x half>, ptr %ptr, i64 1023
522+
%result = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %gep, <2 x half> %data)
523+
ret <2 x half> %result
524+
}
525+
526+
define <2 x half> @flat_atomic_fadd_v2f16_intrinsic_ret__negoffset(ptr %ptr, <2 x half> %data) {
527+
; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
528+
; GFX940: ; %bb.0:
529+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530+
; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:64512 sc0
531+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
532+
; GFX940-NEXT: s_setpc_b64 s[30:31]
533+
;
534+
; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
535+
; GFX12: ; %bb.0:
536+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
537+
; GFX12-NEXT: s_wait_expcnt 0x0
538+
; GFX12-NEXT: s_wait_samplecnt 0x0
539+
; GFX12-NEXT: s_wait_bvhcnt 0x0
540+
; GFX12-NEXT: s_wait_kmcnt 0x0
541+
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
542+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
543+
; GFX12-NEXT: s_setpc_b64 s[30:31]
544+
%gep = getelementptr <2 x half>, ptr %ptr, i64 -256
545+
%result = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %gep, <2 x half> %data)
546+
ret <2 x half> %result
547+
}
548+
549+
define void @flat_atomic_fadd_v2f16_intrinsic_noret__posoffset(ptr %ptr, <2 x half> %data) {
550+
; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
551+
; GFX940: ; %bb.0:
552+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553+
; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
554+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
555+
; GFX940-NEXT: s_setpc_b64 s[30:31]
556+
;
557+
; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
558+
; GFX12: ; %bb.0:
559+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
560+
; GFX12-NEXT: s_wait_expcnt 0x0
561+
; GFX12-NEXT: s_wait_samplecnt 0x0
562+
; GFX12-NEXT: s_wait_bvhcnt 0x0
563+
; GFX12-NEXT: s_wait_kmcnt 0x0
564+
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
565+
; GFX12-NEXT: s_wait_dscnt 0x0
566+
; GFX12-NEXT: s_setpc_b64 s[30:31]
567+
%gep = getelementptr <2 x half>, ptr %ptr, i64 1023
568+
%unused = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %gep, <2 x half> %data)
569+
ret void
570+
}
571+
572+
define void @flat_atomic_fadd_v2f16_intrinsic_noret__negoffset(ptr %ptr, <2 x half> %data) {
573+
; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
574+
; GFX940: ; %bb.0:
575+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576+
; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:64512
577+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
578+
; GFX940-NEXT: s_setpc_b64 s[30:31]
579+
;
580+
; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
581+
; GFX12: ; %bb.0:
582+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
583+
; GFX12-NEXT: s_wait_expcnt 0x0
584+
; GFX12-NEXT: s_wait_samplecnt 0x0
585+
; GFX12-NEXT: s_wait_bvhcnt 0x0
586+
; GFX12-NEXT: s_wait_kmcnt 0x0
587+
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:-1024
588+
; GFX12-NEXT: s_wait_dscnt 0x0
589+
; GFX12-NEXT: s_setpc_b64 s[30:31]
590+
%gep = getelementptr <2 x half>, ptr %ptr, i64 -256
591+
%unused = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %gep, <2 x half> %data)
592+
ret void
593+
}
594+
595+
define <2 x i16> @flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset(ptr %ptr, <2 x i16> %data) {
596+
; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
597+
; GFX940: ; %bb.0:
598+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 sc0
600+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
601+
; GFX940-NEXT: s_setpc_b64 s[30:31]
602+
;
603+
; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
604+
; GFX12: ; %bb.0:
605+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
606+
; GFX12-NEXT: s_wait_expcnt 0x0
607+
; GFX12-NEXT: s_wait_samplecnt 0x0
608+
; GFX12-NEXT: s_wait_bvhcnt 0x0
609+
; GFX12-NEXT: s_wait_kmcnt 0x0
610+
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
611+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
612+
; GFX12-NEXT: s_setpc_b64 s[30:31]
613+
%gep = getelementptr <2 x i16>, ptr %ptr, i64 1023
614+
%result = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16(ptr %gep, <2 x i16> %data)
615+
ret <2 x i16> %result
616+
}
617+
618+
define <2 x i16> @flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset(ptr %ptr, <2 x i16> %data) {
619+
; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
620+
; GFX940: ; %bb.0:
621+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:64512 sc0
623+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
624+
; GFX940-NEXT: s_setpc_b64 s[30:31]
625+
;
626+
; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
627+
; GFX12: ; %bb.0:
628+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
629+
; GFX12-NEXT: s_wait_expcnt 0x0
630+
; GFX12-NEXT: s_wait_samplecnt 0x0
631+
; GFX12-NEXT: s_wait_bvhcnt 0x0
632+
; GFX12-NEXT: s_wait_kmcnt 0x0
633+
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
634+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
635+
; GFX12-NEXT: s_setpc_b64 s[30:31]
636+
%gep = getelementptr <2 x i16>, ptr %ptr, i64 -256
637+
%result = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16(ptr %gep, <2 x i16> %data)
638+
ret <2 x i16> %result
639+
}
640+
641+
define void @flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset(ptr %ptr, <2 x i16> %data) {
642+
; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
643+
; GFX940: ; %bb.0:
644+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
646+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
647+
; GFX940-NEXT: s_setpc_b64 s[30:31]
648+
;
649+
; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
650+
; GFX12: ; %bb.0:
651+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
652+
; GFX12-NEXT: s_wait_expcnt 0x0
653+
; GFX12-NEXT: s_wait_samplecnt 0x0
654+
; GFX12-NEXT: s_wait_bvhcnt 0x0
655+
; GFX12-NEXT: s_wait_kmcnt 0x0
656+
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
657+
; GFX12-NEXT: s_wait_dscnt 0x0
658+
; GFX12-NEXT: s_setpc_b64 s[30:31]
659+
%gep = getelementptr <2 x i16>, ptr %ptr, i64 1023
660+
%unused = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16(ptr %gep, <2 x i16> %data)
661+
ret void
662+
}
663+
664+
define void @flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset(ptr %ptr, <2 x i16> %data) {
665+
; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
666+
; GFX940: ; %bb.0:
667+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668+
; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:64512
669+
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670+
; GFX940-NEXT: s_setpc_b64 s[30:31]
671+
;
672+
; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
673+
; GFX12: ; %bb.0:
674+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
675+
; GFX12-NEXT: s_wait_expcnt 0x0
676+
; GFX12-NEXT: s_wait_samplecnt 0x0
677+
; GFX12-NEXT: s_wait_bvhcnt 0x0
678+
; GFX12-NEXT: s_wait_kmcnt 0x0
679+
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-1024
680+
; GFX12-NEXT: s_wait_dscnt 0x0
681+
; GFX12-NEXT: s_setpc_b64 s[30:31]
682+
%gep = getelementptr <2 x i16>, ptr %ptr, i64 -256
683+
%unused = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16(ptr %gep, <2 x i16> %data)
684+
ret void
685+
}
686+
411687
attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }

0 commit comments

Comments
 (0)