@@ -357,6 +357,65 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
357
357
ret void
358
358
}
359
359
360
+ define amdgpu_cs_chain void @alloca_and_call () {
361
+ ; GISEL-GFX11-LABEL: alloca_and_call:
362
+ ; GISEL-GFX11: ; %bb.0: ; %.entry
363
+ ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364
+ ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
365
+ ; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
366
+ ; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
367
+ ; GISEL-GFX11-NEXT: s_mov_b32 s32, 16
368
+ ; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
369
+ ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
370
+ ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
371
+ ; GISEL-GFX11-NEXT: s_endpgm
372
+ ;
373
+ ; GISEL-GFX10-LABEL: alloca_and_call:
374
+ ; GISEL-GFX10: ; %bb.0: ; %.entry
375
+ ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
377
+ ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
378
+ ; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
379
+ ; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
380
+ ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
381
+ ; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
382
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
383
+ ; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
384
+ ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
385
+ ; GISEL-GFX10-NEXT: s_endpgm
386
+ ;
387
+ ; DAGISEL-GFX11-LABEL: alloca_and_call:
388
+ ; DAGISEL-GFX11: ; %bb.0: ; %.entry
389
+ ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390
+ ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
391
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
392
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
393
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16
394
+ ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
395
+ ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
396
+ ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
397
+ ; DAGISEL-GFX11-NEXT: s_endpgm
398
+ ;
399
+ ; DAGISEL-GFX10-LABEL: alloca_and_call:
400
+ ; DAGISEL-GFX10: ; %bb.0: ; %.entry
401
+ ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
403
+ ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
404
+ ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
405
+ ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
406
+ ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
407
+ ; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
408
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
409
+ ; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
410
+ ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
411
+ ; DAGISEL-GFX10-NEXT: s_endpgm
412
+ .entry:
413
+ %v = alloca [3 x i32 ], addrspace (5 )
414
+ store i32 42 , ptr addrspace (5 ) %v
415
+ call amdgpu_gfx void @use (ptr addrspace (5 ) %v )
416
+ ret void
417
+ }
418
+
360
419
define amdgpu_cs void @cs_to_chain (<3 x i32 > inreg %a , <3 x i32 > %b ) {
361
420
; GISEL-GFX11-LABEL: cs_to_chain:
362
421
; GISEL-GFX11: ; %bb.0:
@@ -807,9 +866,8 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
807
866
; GISEL-GFX11-NEXT: s_mov_b32 s1, 2
808
867
; GISEL-GFX11-NEXT: s_mov_b32 s0, 1
809
868
; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8
810
- ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
811
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
812
- ; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, s32, v0
869
+ ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
870
+ ; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, 32, v0
813
871
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
814
872
; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
815
873
; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
@@ -819,14 +877,12 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
819
877
; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
820
878
; GISEL-GFX10: ; %bb.0:
821
879
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822
- ; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
823
880
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
824
- ; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s32
825
881
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
882
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
826
883
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
827
884
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
828
- ; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
829
- ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
885
+ ; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, 32, v0
830
886
; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
831
887
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
832
888
; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
@@ -840,24 +896,21 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
840
896
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
841
897
; DAGISEL-GFX11: ; %bb.0:
842
898
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843
- ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
844
899
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
845
900
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
846
- ; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, s32
901
+ ; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 32
847
902
; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
848
903
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
849
904
; DAGISEL-GFX11-NEXT: s_endpgm
850
905
;
851
906
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
852
907
; DAGISEL-GFX10: ; %bb.0:
853
908
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854
- ; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
855
909
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
856
- ; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s32
910
+ ; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 32
911
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
857
912
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
858
913
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
859
- ; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, v2
860
- ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
861
914
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
862
915
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
863
916
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
0 commit comments