@@ -2090,69 +2090,69 @@ define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
2090
2090
; GCN-LABEL: dyn_extract_v16f64_s_s:
2091
2091
; GCN: ; %bb.0: ; %entry
2092
2092
; GCN-NEXT: s_mov_b32 s66, 0
2093
+ ; GCN-NEXT: s_mov_b32 s64, 0
2094
+ ; GCN-NEXT: s_mov_b32 s62, 0
2095
+ ; GCN-NEXT: s_mov_b32 s60, 0
2096
+ ; GCN-NEXT: s_mov_b32 s58, 0
2097
+ ; GCN-NEXT: s_mov_b32 s56, 0
2098
+ ; GCN-NEXT: s_mov_b32 s54, 0
2099
+ ; GCN-NEXT: s_mov_b32 s52, 0
2100
+ ; GCN-NEXT: s_mov_b32 s50, 0
2101
+ ; GCN-NEXT: s_mov_b32 s48, 0
2102
+ ; GCN-NEXT: s_mov_b32 s46, 0
2103
+ ; GCN-NEXT: s_mov_b32 s44, 0
2104
+ ; GCN-NEXT: s_mov_b32 s40, 0
2093
2105
; GCN-NEXT: s_mov_b64 s[36:37], 1.0
2094
2106
; GCN-NEXT: s_mov_b32 m0, s2
2095
2107
; GCN-NEXT: s_mov_b32 s67, 0x40300000
2096
2108
; GCN-NEXT: s_mov_b32 s65, 0x402e0000
2097
- ; GCN-NEXT: s_mov_b32 s64, s66
2098
2109
; GCN-NEXT: s_mov_b32 s63, 0x402c0000
2099
- ; GCN-NEXT: s_mov_b32 s62, s66
2100
2110
; GCN-NEXT: s_mov_b32 s61, 0x402a0000
2101
- ; GCN-NEXT: s_mov_b32 s60, s66
2102
2111
; GCN-NEXT: s_mov_b32 s59, 0x40280000
2103
- ; GCN-NEXT: s_mov_b32 s58, s66
2104
2112
; GCN-NEXT: s_mov_b32 s57, 0x40260000
2105
- ; GCN-NEXT: s_mov_b32 s56, s66
2106
2113
; GCN-NEXT: s_mov_b32 s55, 0x40240000
2107
- ; GCN-NEXT: s_mov_b32 s54, s66
2108
2114
; GCN-NEXT: s_mov_b32 s53, 0x40220000
2109
- ; GCN-NEXT: s_mov_b32 s52, s66
2110
2115
; GCN-NEXT: s_mov_b32 s51, 0x40200000
2111
- ; GCN-NEXT: s_mov_b32 s50, s66
2112
2116
; GCN-NEXT: s_mov_b32 s49, 0x401c0000
2113
- ; GCN-NEXT: s_mov_b32 s48, s66
2114
2117
; GCN-NEXT: s_mov_b32 s47, 0x40180000
2115
- ; GCN-NEXT: s_mov_b32 s46, s66
2116
2118
; GCN-NEXT: s_mov_b32 s45, 0x40140000
2117
- ; GCN-NEXT: s_mov_b32 s44, s66
2118
2119
; GCN-NEXT: s_mov_b64 s[42:43], 4.0
2119
2120
; GCN-NEXT: s_mov_b32 s41, 0x40080000
2120
- ; GCN-NEXT: s_mov_b32 s40, s66
2121
2121
; GCN-NEXT: s_mov_b64 s[38:39], 2.0
2122
2122
; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37]
2123
2123
; GCN-NEXT: ; return to shader part epilog
2124
2124
;
2125
2125
; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s:
2126
2126
; GFX10PLUS: ; %bb.0: ; %entry
2127
- ; GFX10PLUS-NEXT: s_mov_b32 s66, 0
2128
2127
; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0
2129
2128
; GFX10PLUS-NEXT: s_mov_b32 m0, s2
2129
+ ; GFX10PLUS-NEXT: s_mov_b32 s66, 0
2130
+ ; GFX10PLUS-NEXT: s_mov_b32 s64, 0
2131
+ ; GFX10PLUS-NEXT: s_mov_b32 s62, 0
2132
+ ; GFX10PLUS-NEXT: s_mov_b32 s60, 0
2133
+ ; GFX10PLUS-NEXT: s_mov_b32 s58, 0
2134
+ ; GFX10PLUS-NEXT: s_mov_b32 s56, 0
2135
+ ; GFX10PLUS-NEXT: s_mov_b32 s54, 0
2136
+ ; GFX10PLUS-NEXT: s_mov_b32 s52, 0
2137
+ ; GFX10PLUS-NEXT: s_mov_b32 s50, 0
2138
+ ; GFX10PLUS-NEXT: s_mov_b32 s48, 0
2139
+ ; GFX10PLUS-NEXT: s_mov_b32 s46, 0
2140
+ ; GFX10PLUS-NEXT: s_mov_b32 s44, 0
2141
+ ; GFX10PLUS-NEXT: s_mov_b32 s40, 0
2130
2142
; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000
2131
2143
; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000
2132
- ; GFX10PLUS-NEXT: s_mov_b32 s64, s66
2133
2144
; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000
2134
- ; GFX10PLUS-NEXT: s_mov_b32 s62, s66
2135
2145
; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000
2136
- ; GFX10PLUS-NEXT: s_mov_b32 s60, s66
2137
2146
; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000
2138
- ; GFX10PLUS-NEXT: s_mov_b32 s58, s66
2139
2147
; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000
2140
- ; GFX10PLUS-NEXT: s_mov_b32 s56, s66
2141
2148
; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000
2142
- ; GFX10PLUS-NEXT: s_mov_b32 s54, s66
2143
2149
; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000
2144
- ; GFX10PLUS-NEXT: s_mov_b32 s52, s66
2145
2150
; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000
2146
- ; GFX10PLUS-NEXT: s_mov_b32 s50, s66
2147
2151
; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000
2148
- ; GFX10PLUS-NEXT: s_mov_b32 s48, s66
2149
2152
; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000
2150
- ; GFX10PLUS-NEXT: s_mov_b32 s46, s66
2151
2153
; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000
2152
- ; GFX10PLUS-NEXT: s_mov_b32 s44, s66
2153
2154
; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0
2154
2155
; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000
2155
- ; GFX10PLUS-NEXT: s_mov_b32 s40, s66
2156
2156
; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0
2157
2157
; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37]
2158
2158
; GFX10PLUS-NEXT: ; return to shader part epilog
@@ -3085,10 +3085,10 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3085
3085
; GPRIDX-NEXT: ; %bb.0: ; %entry
3086
3086
; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3087
3087
; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8
3088
+ ; GPRIDX-NEXT: s_mov_b32 s4, 0
3089
+ ; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000
3088
3090
; GPRIDX-NEXT: s_mov_b32 s2, 0
3089
3091
; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000
3090
- ; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000
3091
- ; GPRIDX-NEXT: s_mov_b32 s4, s2
3092
3092
; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
3093
3093
; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1
3094
3094
; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
@@ -3176,10 +3176,10 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3176
3176
; MOVREL-NEXT: ; %bb.0: ; %entry
3177
3177
; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3178
3178
; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8
3179
+ ; MOVREL-NEXT: s_mov_b32 s4, 0
3180
+ ; MOVREL-NEXT: s_mov_b32 s5, 0x40080000
3179
3181
; MOVREL-NEXT: s_mov_b32 s2, 0
3180
3182
; MOVREL-NEXT: s_mov_b32 s3, 0x40140000
3181
- ; MOVREL-NEXT: s_mov_b32 s5, 0x40080000
3182
- ; MOVREL-NEXT: s_mov_b32 s4, s2
3183
3183
; MOVREL-NEXT: s_waitcnt lgkmcnt(0)
3184
3184
; MOVREL-NEXT: s_cmp_eq_u32 s8, 1
3185
3185
; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
@@ -3207,7 +3207,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3207
3207
; GFX10-NEXT: kernel_code_entry_byte_offset = 256
3208
3208
; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
3209
3209
; GFX10-NEXT: granulated_workitem_vgpr_count = 0
3210
- ; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
3210
+ ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
3211
3211
; GFX10-NEXT: priority = 0
3212
3212
; GFX10-NEXT: float_mode = 240
3213
3213
; GFX10-NEXT: priv = 0
@@ -3250,7 +3250,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3250
3250
; GFX10-NEXT: gds_segment_byte_size = 0
3251
3251
; GFX10-NEXT: kernarg_segment_byte_size = 12
3252
3252
; GFX10-NEXT: workgroup_fbarrier_count = 0
3253
- ; GFX10-NEXT: wavefront_sgpr_count = 9
3253
+ ; GFX10-NEXT: wavefront_sgpr_count = 7
3254
3254
; GFX10-NEXT: workitem_vgpr_count = 3
3255
3255
; GFX10-NEXT: reserved_vgpr_first = 0
3256
3256
; GFX10-NEXT: reserved_vgpr_count = 0
@@ -3267,22 +3267,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3267
3267
; GFX10-NEXT: .end_amd_kernel_code_t
3268
3268
; GFX10-NEXT: ; %bb.0: ; %entry
3269
3269
; GFX10-NEXT: s_clause 0x1
3270
- ; GFX10-NEXT: s_load_dword s8 , s[4:5], 0x8
3270
+ ; GFX10-NEXT: s_load_dword s6 , s[4:5], 0x8
3271
3271
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3272
3272
; GFX10-NEXT: s_mov_b32 s2, 0
3273
- ; GFX10-NEXT: s_mov_b32 s3, 0x40140000
3274
- ; GFX10-NEXT: s_mov_b32 s5, 0x40080000
3275
- ; GFX10-NEXT: s_mov_b32 s4, s2
3273
+ ; GFX10-NEXT: s_mov_b32 s3, 0x40080000
3276
3274
; GFX10-NEXT: v_mov_b32_e32 v2, 0
3277
3275
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3278
- ; GFX10-NEXT: s_cmp_eq_u32 s8, 1
3279
- ; GFX10-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3280
- ; GFX10-NEXT: s_cmp_eq_u32 s8, 2
3281
- ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3282
- ; GFX10-NEXT: s_cmp_eq_u32 s8, 3
3283
- ; GFX10-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3284
- ; GFX10-NEXT: s_cmp_eq_u32 s8, 4
3276
+ ; GFX10-NEXT: s_cmp_eq_u32 s6, 1
3277
+ ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3278
+ ; GFX10-NEXT: s_cmp_eq_u32 s6, 2
3285
3279
; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3280
+ ; GFX10-NEXT: s_cmp_eq_u32 s6, 3
3281
+ ; GFX10-NEXT: s_mov_b32 s4, 0
3282
+ ; GFX10-NEXT: s_mov_b32 s5, 0x40140000
3283
+ ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3284
+ ; GFX10-NEXT: s_cmp_eq_u32 s6, 4
3285
+ ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3286
3286
; GFX10-NEXT: v_mov_b32_e32 v0, s2
3287
3287
; GFX10-NEXT: v_mov_b32_e32 v1, s3
3288
3288
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3299,7 +3299,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3299
3299
; GFX11-NEXT: kernel_code_entry_byte_offset = 256
3300
3300
; GFX11-NEXT: kernel_code_prefetch_byte_size = 0
3301
3301
; GFX11-NEXT: granulated_workitem_vgpr_count = 0
3302
- ; GFX11-NEXT: granulated_wavefront_sgpr_count = 1
3302
+ ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0
3303
3303
; GFX11-NEXT: priority = 0
3304
3304
; GFX11-NEXT: float_mode = 240
3305
3305
; GFX11-NEXT: priv = 0
@@ -3342,7 +3342,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3342
3342
; GFX11-NEXT: gds_segment_byte_size = 0
3343
3343
; GFX11-NEXT: kernarg_segment_byte_size = 12
3344
3344
; GFX11-NEXT: workgroup_fbarrier_count = 0
3345
- ; GFX11-NEXT: wavefront_sgpr_count = 9
3345
+ ; GFX11-NEXT: wavefront_sgpr_count = 7
3346
3346
; GFX11-NEXT: workitem_vgpr_count = 3
3347
3347
; GFX11-NEXT: reserved_vgpr_first = 0
3348
3348
; GFX11-NEXT: reserved_vgpr_count = 0
@@ -3359,22 +3359,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
3359
3359
; GFX11-NEXT: .end_amd_kernel_code_t
3360
3360
; GFX11-NEXT: ; %bb.0: ; %entry
3361
3361
; GFX11-NEXT: s_clause 0x1
3362
- ; GFX11-NEXT: s_load_b32 s8 , s[0:1], 0x8
3362
+ ; GFX11-NEXT: s_load_b32 s6 , s[0:1], 0x8
3363
3363
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
3364
3364
; GFX11-NEXT: s_mov_b32 s2, 0
3365
- ; GFX11-NEXT: s_mov_b32 s3, 0x40140000
3366
- ; GFX11-NEXT: s_mov_b32 s5, 0x40080000
3367
- ; GFX11-NEXT: s_mov_b32 s4, s2
3365
+ ; GFX11-NEXT: s_mov_b32 s3, 0x40080000
3368
3366
; GFX11-NEXT: v_mov_b32_e32 v2, 0
3369
3367
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3370
- ; GFX11-NEXT: s_cmp_eq_u32 s8, 1
3371
- ; GFX11-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0
3372
- ; GFX11-NEXT: s_cmp_eq_u32 s8, 2
3373
- ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
3374
- ; GFX11-NEXT: s_cmp_eq_u32 s8, 3
3375
- ; GFX11-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5]
3376
- ; GFX11-NEXT: s_cmp_eq_u32 s8, 4
3368
+ ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
3369
+ ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0
3370
+ ; GFX11-NEXT: s_cmp_eq_u32 s6, 2
3377
3371
; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
3372
+ ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
3373
+ ; GFX11-NEXT: s_mov_b32 s4, 0
3374
+ ; GFX11-NEXT: s_mov_b32 s5, 0x40140000
3375
+ ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
3376
+ ; GFX11-NEXT: s_cmp_eq_u32 s6, 4
3377
+ ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
3378
3378
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3379
3379
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
3380
3380
; GFX11-NEXT: s_nop 0
@@ -4784,11 +4784,8 @@ define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) {
4784
4784
; MOVREL-LABEL: v_extract_v64i32_32:
4785
4785
; MOVREL: ; %bb.0:
4786
4786
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787
- ; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80
4788
- ; MOVREL-NEXT: v_mov_b32_e32 v2, s4
4789
- ; MOVREL-NEXT: v_mov_b32_e32 v3, s5
4790
- ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
4791
- ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
4787
+ ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
4788
+ ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4792
4789
; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
4793
4790
; MOVREL-NEXT: s_waitcnt vmcnt(0)
4794
4791
; MOVREL-NEXT: s_setpc_b64 s[30:31]
@@ -4823,11 +4820,8 @@ define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) {
4823
4820
; MOVREL-LABEL: v_extract_v64i32_33:
4824
4821
; MOVREL: ; %bb.0:
4825
4822
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4826
- ; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80
4827
- ; MOVREL-NEXT: v_mov_b32_e32 v2, s4
4828
- ; MOVREL-NEXT: v_mov_b32_e32 v3, s5
4829
- ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
4830
- ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
4823
+ ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
4824
+ ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
4831
4825
; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
4832
4826
; MOVREL-NEXT: s_waitcnt vmcnt(0)
4833
4827
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
0 commit comments