@@ -4000,116 +4000,6 @@ bb:
4000
4000
ret void
4001
4001
}
4002
4002
4003
- define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset (ptr addrspace (5 ) inreg %sgpr_base , i32 inreg %sidx , i32 %vidx ) {
4004
- ; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4005
- ; GFX9: ; %bb.0: ; %bb
4006
- ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4007
- ; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4008
- ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4009
- ; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4010
- ; GFX9-NEXT: v_mov_b32_e32 v1, 15
4011
- ; GFX9-NEXT: scratch_store_dword v0, v1, off
4012
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
4013
- ; GFX9-NEXT: s_endpgm
4014
- ;
4015
- ; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4016
- ; GFX10: ; %bb.0: ; %bb
4017
- ; GFX10-NEXT: s_add_u32 s0, s0, s5
4018
- ; GFX10-NEXT: s_addc_u32 s1, s1, 0
4019
- ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4020
- ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4021
- ; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4022
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
4023
- ; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4024
- ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4025
- ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4026
- ; GFX10-NEXT: s_endpgm
4027
- ;
4028
- ; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4029
- ; GFX940: ; %bb.0: ; %bb
4030
- ; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4031
- ; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4032
- ; GFX940-NEXT: v_mov_b32_e32 v1, 15
4033
- ; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4034
- ; GFX940-NEXT: s_waitcnt vmcnt(0)
4035
- ; GFX940-NEXT: s_endpgm
4036
- ;
4037
- ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4038
- ; GFX11: ; %bb.0: ; %bb
4039
- ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4040
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4041
- ; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4042
- ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4043
- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4044
- ; GFX11-NEXT: s_endpgm
4045
- ;
4046
- ; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4047
- ; GFX12: ; %bb.0: ; %bb
4048
- ; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4049
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4050
- ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4051
- ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4052
- ; GFX12-NEXT: s_wait_storecnt 0x0
4053
- ; GFX12-NEXT: s_endpgm
4054
- ;
4055
- ; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4056
- ; UNALIGNED_GFX9: ; %bb.0: ; %bb
4057
- ; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4058
- ; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4059
- ; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4060
- ; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4061
- ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4062
- ; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4063
- ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4064
- ; UNALIGNED_GFX9-NEXT: s_endpgm
4065
- ;
4066
- ; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4067
- ; UNALIGNED_GFX10: ; %bb.0: ; %bb
4068
- ; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4069
- ; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4070
- ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4071
- ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4072
- ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4073
- ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4074
- ; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4075
- ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4076
- ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4077
- ; UNALIGNED_GFX10-NEXT: s_endpgm
4078
- ;
4079
- ; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4080
- ; UNALIGNED_GFX940: ; %bb.0: ; %bb
4081
- ; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4082
- ; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4083
- ; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4084
- ; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4085
- ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4086
- ; UNALIGNED_GFX940-NEXT: s_endpgm
4087
- ;
4088
- ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4089
- ; UNALIGNED_GFX11: ; %bb.0: ; %bb
4090
- ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4091
- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4092
- ; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4093
- ; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4094
- ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4095
- ; UNALIGNED_GFX11-NEXT: s_endpgm
4096
- ;
4097
- ; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4098
- ; UNALIGNED_GFX12: ; %bb.0: ; %bb
4099
- ; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4100
- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4101
- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4102
- ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4103
- ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4104
- ; UNALIGNED_GFX12-NEXT: s_endpgm
4105
- bb:
4106
- %add1 = add nsw i32 %sidx , %vidx
4107
- %add2 = add nsw i32 %add1 , -16
4108
- %gep = getelementptr inbounds [16 x i8 ], ptr addrspace (5 ) %sgpr_base , i32 0 , i32 %add2
4109
- store volatile i32 15 , ptr addrspace (5 ) %gep , align 4
4110
- ret void
4111
- }
4112
-
4113
4003
define amdgpu_gs void @sgpr_base_negative_offset (ptr addrspace (1 ) %out , ptr addrspace (5 ) inreg %scevgep ) {
4114
4004
; GFX9-LABEL: sgpr_base_negative_offset:
4115
4005
; GFX9: ; %bb.0: ; %entry
0 commit comments