@@ -481,6 +481,132 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
481
481
ret void
482
482
}
483
483
484
+ ; GCN-LABEL: {{^}}multi_stage_recurse2:
485
+ ; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
486
+ ; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487
+ ; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488
+ ; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
489
+ ; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
490
+ ; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
491
+ ; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
492
+ ; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493
+ ; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494
+ ; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495
+ ; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
496
+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497
+ ; GCN-LABEL: {{^}}multi_stage_recurse1:
498
+ ; GCN: .set multi_stage_recurse1.num_vgpr, 41
499
+ ; GCN: .set multi_stage_recurse1.num_agpr, 0
500
+ ; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
501
+ ; GCN: .set multi_stage_recurse1.private_seg_size, 16
502
+ ; GCN: .set multi_stage_recurse1.uses_vcc, 1
503
+ ; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504
+ ; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505
+ ; GCN: .set multi_stage_recurse1.has_recursion, 1
506
+ ; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507
+ ; GCN: TotalNumSgprs: 38
508
+ ; GCN: NumVgprs: 41
509
+ ; GCN: ScratchSize: 16
510
+ define void @multi_stage_recurse1 (i32 %val ) #2 {
511
+ call void @multi_stage_recurse2 (i32 %val )
512
+ ret void
513
+ }
514
+ define void @multi_stage_recurse2 (i32 %val ) #2 {
515
+ call void @multi_stage_recurse1 (i32 %val )
516
+ ret void
517
+ }
518
+
519
+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse:
520
+ ; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
521
+ ; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
522
+ ; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
523
+ ; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
524
+ ; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
525
+ ; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
526
+ ; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
527
+ ; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
528
+ ; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
529
+ ; GCN: TotalNumSgprs: 40
530
+ ; GCN: NumVgprs: 41
531
+ ; GCN: ScratchSize: 16
532
+ define amdgpu_kernel void @usage_multi_stage_recurse (i32 %n ) #0 {
533
+ call void @multi_stage_recurse1 (i32 %n )
534
+ ret void
535
+ }
536
+
537
+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
538
+ ; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
539
+ ; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
540
+ ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
541
+ ; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
542
+ ; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
543
+ ; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
544
+ ; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
545
+ ; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
546
+ ; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
547
+ ; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
548
+ ; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
549
+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
550
+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
551
+ ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
552
+ ; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
553
+ ; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
554
+ ; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
555
+ ; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
556
+ ; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
557
+ ; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
558
+ ; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
559
+ ; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
560
+ ; GCN: TotalNumSgprs: 38
561
+ ; GCN: NumVgprs: 41
562
+ ; GCN: ScratchSize: 16
563
+ define void @multi_stage_recurse_noattr1 (i32 %val ) #0 {
564
+ call void @multi_stage_recurse_noattr2 (i32 %val )
565
+ ret void
566
+ }
567
+ define void @multi_stage_recurse_noattr2 (i32 %val ) #0 {
568
+ call void @multi_stage_recurse_noattr1 (i32 %val )
569
+ ret void
570
+ }
571
+
572
+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
573
+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
574
+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
575
+ ; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
576
+ ; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
577
+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
578
+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
579
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
580
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
581
+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
582
+ ; GCN: TotalNumSgprs: 40
583
+ ; GCN: NumVgprs: 41
584
+ ; GCN: ScratchSize: 16
585
+ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs (i32 %n ) #0 {
586
+ call void @multi_stage_recurse_noattr1 (i32 %n )
587
+ ret void
588
+ }
589
+
590
+ ; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
591
+ ; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
592
+ ; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
593
+ ; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
594
+ ; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
595
+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
596
+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
597
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
598
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
599
+ ; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
600
+ ; GCN: TotalNumSgprs: 49
601
+ ; GCN: NumVgprs: 41
602
+ ; GCN: ScratchSize: 2052
603
+ define amdgpu_kernel void @multi_call_with_multi_stage_recurse (i32 %n ) #0 {
604
+ call void @use_stack0 ()
605
+ call void @use_stack1 ()
606
+ call void @multi_stage_recurse1 (i32 %n )
607
+ ret void
608
+ }
609
+
484
610
; Make sure there's no assert when a sgpr96 is used.
485
611
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
486
612
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
0 commit comments