@@ -3990,6 +3990,116 @@ bb:
3990
3990
ret void
3991
3991
}
3992
3992
3993
+ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
3994
+ ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
3995
+ ; GFX9: ; %bb.0: ; %bb
3996
+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
3997
+ ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
3998
+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
3999
+ ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4000
+ ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4001
+ ; GFX9-NEXT: scratch_store_dword v0, v1, off
4002
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
4003
+ ; GFX9-NEXT: s_endpgm
4004
+ ;
4005
+ ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4006
+ ; GFX10: ; %bb.0: ; %bb
4007
+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
4008
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4009
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4010
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4011
+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4012
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4013
+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4014
+ ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4015
+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4016
+ ; GFX10-NEXT: s_endpgm
4017
+ ;
4018
+ ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4019
+ ; GFX940: ; %bb.0: ; %bb
4020
+ ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4021
+ ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4022
+ ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4023
+ ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4024
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
4025
+ ; GFX940-NEXT: s_endpgm
4026
+ ;
4027
+ ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4028
+ ; GFX11: ; %bb.0: ; %bb
4029
+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4030
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4031
+ ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4032
+ ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4033
+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4034
+ ; GFX11-NEXT: s_endpgm
4035
+ ;
4036
+ ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4037
+ ; GFX12: ; %bb.0: ; %bb
4038
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4039
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4040
+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4041
+ ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4042
+ ; GFX12-NEXT: s_wait_storecnt 0x0
4043
+ ; GFX12-NEXT: s_endpgm
4044
+ ;
4045
+ ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4046
+ ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4047
+ ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4048
+ ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4049
+ ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4050
+ ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4051
+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4052
+ ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4053
+ ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4054
+ ; UNALIGNED_GFX9-NEXT: s_endpgm
4055
+ ;
4056
+ ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4057
+ ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4058
+ ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4059
+ ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4060
+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4061
+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4062
+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4063
+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4064
+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4065
+ ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4066
+ ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4067
+ ; UNALIGNED_GFX10-NEXT: s_endpgm
4068
+ ;
4069
+ ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4070
+ ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4071
+ ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4072
+ ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4073
+ ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4074
+ ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4075
+ ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4076
+ ; UNALIGNED_GFX940-NEXT: s_endpgm
4077
+ ;
4078
+ ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4079
+ ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4080
+ ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4081
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4082
+ ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4083
+ ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4084
+ ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4085
+ ; UNALIGNED_GFX11-NEXT: s_endpgm
4086
+ ;
4087
+ ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4088
+ ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4089
+ ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4090
+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4091
+ ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4092
+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4093
+ ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4094
+ ; UNALIGNED_GFX12-NEXT: s_endpgm
4095
+ bb:
4096
+ %add1 = add nsw i32 %sidx , %vidx
4097
+ %add2 = add nsw i32 %add1 , -16
4098
+ %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4099
+ store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4100
+ ret void
4101
+ }
4102
+
3993
4103
define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
3994
4104
; GFX9-LABEL: sgpr_base_negative_offset:
3995
4105
; GFX9: ; %bb.0: ; %entry
0 commit comments