@@ -4000,6 +4000,116 @@ bb:
4000
4000
ret void
4001
4001
}
4002
4002
4003
+ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
4004
+ ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4005
+ ; GFX9: ; %bb.0: ; %bb
4006
+ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4007
+ ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4008
+ ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4009
+ ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4010
+ ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4011
+ ; GFX9-NEXT: scratch_store_dword v0, v1, off
4012
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
4013
+ ; GFX9-NEXT: s_endpgm
4014
+ ;
4015
+ ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4016
+ ; GFX10: ; %bb.0: ; %bb
4017
+ ; GFX10-NEXT: s_add_u32 s0, s0, s5
4018
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4019
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4020
+ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4021
+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4022
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4023
+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4024
+ ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4025
+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4026
+ ; GFX10-NEXT: s_endpgm
4027
+ ;
4028
+ ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4029
+ ; GFX940: ; %bb.0: ; %bb
4030
+ ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4031
+ ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4032
+ ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4033
+ ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4034
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
4035
+ ; GFX940-NEXT: s_endpgm
4036
+ ;
4037
+ ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4038
+ ; GFX11: ; %bb.0: ; %bb
4039
+ ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4040
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4041
+ ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4042
+ ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4043
+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4044
+ ; GFX11-NEXT: s_endpgm
4045
+ ;
4046
+ ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4047
+ ; GFX12: ; %bb.0: ; %bb
4048
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4049
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4050
+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4051
+ ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4052
+ ; GFX12-NEXT: s_wait_storecnt 0x0
4053
+ ; GFX12-NEXT: s_endpgm
4054
+ ;
4055
+ ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4056
+ ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4057
+ ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4058
+ ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4059
+ ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4060
+ ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4061
+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4062
+ ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4063
+ ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4064
+ ; UNALIGNED_GFX9-NEXT: s_endpgm
4065
+ ;
4066
+ ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4067
+ ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4068
+ ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4069
+ ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4070
+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4071
+ ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4072
+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4073
+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4074
+ ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4075
+ ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4076
+ ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4077
+ ; UNALIGNED_GFX10-NEXT: s_endpgm
4078
+ ;
4079
+ ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4080
+ ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4081
+ ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4082
+ ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4083
+ ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4084
+ ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4085
+ ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4086
+ ; UNALIGNED_GFX940-NEXT: s_endpgm
4087
+ ;
4088
+ ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4089
+ ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4090
+ ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4091
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4092
+ ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4093
+ ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4094
+ ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4095
+ ; UNALIGNED_GFX11-NEXT: s_endpgm
4096
+ ;
4097
+ ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4098
+ ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4099
+ ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4100
+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4101
+ ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4102
+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4103
+ ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4104
+ ; UNALIGNED_GFX12-NEXT: s_endpgm
4105
+ bb:
4106
+ %add1 = add nsw i32 %sidx , %vidx
4107
+ %add2 = add nsw i32 %add1 , -16
4108
+ %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4109
+ store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4110
+ ret void
4111
+ }
4112
+
4003
4113
define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
4004
4114
; GFX9-LABEL: sgpr_base_negative_offset:
4005
4115
; GFX9: ; %bb.0: ; %entry
0 commit comments