@@ -481,6 +481,136 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
481
481
ret void
482
482
}
483
483
484
+ ; GCN-LABEL: {{^}}multi_stage_recurse2:
485
+ ; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr)
486
+ ; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487
+ ; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488
+ ; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
489
+ ; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
490
+ ; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
491
+ ; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
492
+ ; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493
+ ; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494
+ ; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495
+ ; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr)
496
+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497
+ ; GCN-LABEL: {{^}}multi_stage_recurse1:
498
+ ; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr)
499
+ ; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr)
500
+ ; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
501
+ ; GCN: .set multi_stage_recurse1.private_seg_size, 16
502
+ ; GCN: .set multi_stage_recurse1.uses_vcc, 1
503
+ ; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504
+ ; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505
+ ; GCN: .set multi_stage_recurse1.has_recursion, 1
506
+ ; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507
+ ; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4
508
+ ; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr)
509
+ ; GCN: ScratchSize: 16
510
+ define void @multi_stage_recurse1 (i32 %val ) #2 {
511
+ call void @multi_stage_recurse2 (i32 %val )
512
+ call void asm sideeffect "" , "~{v47}" () #0
513
+ ret void
514
+ }
515
+ define void @multi_stage_recurse2 (i32 %val ) #2 {
516
+ call void @multi_stage_recurse1 (i32 %val )
517
+ call void asm sideeffect "" , "~{v42}" () #0
518
+ ret void
519
+ }
520
+
521
+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse:
522
+ ; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
523
+ ; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
524
+ ; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
525
+ ; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
526
+ ; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
527
+ ; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
528
+ ; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
529
+ ; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
530
+ ; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
531
+ ; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6
532
+ ; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr
533
+ ; GCN: ScratchSize: 16
534
+ define amdgpu_kernel void @usage_multi_stage_recurse (i32 %n ) #0 {
535
+ call void @multi_stage_recurse1 (i32 %n )
536
+ ret void
537
+ }
538
+
539
+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
540
+ ; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
541
+ ; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
542
+ ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr)
543
+ ; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
544
+ ; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
545
+ ; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
546
+ ; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
547
+ ; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
548
+ ; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
549
+ ; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
550
+ ; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
551
+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
552
+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
553
+ ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr)
554
+ ; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr)
555
+ ; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr)
556
+ ; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
557
+ ; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
558
+ ; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
559
+ ; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
560
+ ; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
561
+ ; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
562
+ ; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4
563
+ ; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr)
564
+ ; GCN: ScratchSize: 16
565
+ define void @multi_stage_recurse_noattr1 (i32 %val ) #0 {
566
+ call void @multi_stage_recurse_noattr2 (i32 %val )
567
+ call void asm sideeffect "" , "~{s56}" () #0
568
+ ret void
569
+ }
570
+ define void @multi_stage_recurse_noattr2 (i32 %val ) #0 {
571
+ call void @multi_stage_recurse_noattr1 (i32 %val )
572
+ call void asm sideeffect "" , "~{s53}" () #0
573
+ ret void
574
+ }
575
+
576
+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
577
+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
578
+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
579
+ ; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
580
+ ; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
581
+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
582
+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
583
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
584
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
585
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
586
+ ; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6
587
+ ; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr
588
+ ; GCN: ScratchSize: 16
589
+ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs (i32 %n ) #0 {
590
+ call void @multi_stage_recurse_noattr1 (i32 %n )
591
+ ret void
592
+ }
593
+
594
+ ; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
595
+ ; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
596
+ ; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
597
+ ; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
598
+ ; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
599
+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
600
+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
601
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
602
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
603
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
604
+ ; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6
605
+ ; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr
606
+ ; GCN: ScratchSize: 2052
607
+ define amdgpu_kernel void @multi_call_with_multi_stage_recurse (i32 %n ) #0 {
608
+ call void @use_stack0 ()
609
+ call void @use_stack1 ()
610
+ call void @multi_stage_recurse1 (i32 %n )
611
+ ret void
612
+ }
613
+
484
614
; Make sure there's no assert when a sgpr96 is used.
485
615
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
486
616
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
0 commit comments