@@ -1114,19 +1114,23 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
1114
1114
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1115
1115
; GCN-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
1116
1116
; GCN-NEXT: s_mov_b32 s3, 0xf000
1117
- ; GCN-NEXT: s_mov_b32 s2, -1
1118
1117
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1119
1118
; GCN-NEXT: v_add_f64 v[0:1], s[4:5], 1.0
1120
- ; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
1121
- ; GCN-NEXT: v_add_i32_e32 v1, vcc, 2, v1
1122
- ; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
1123
- ; GCN-NEXT: v_add_i32_e32 v0, vcc, 2, v0
1124
- ; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
1125
- ; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
1126
- ; GCN-NEXT: v_or_b32_e32 v1, v2, v1
1127
- ; GCN-NEXT: v_or_b32_e32 v0, v3, v0
1128
- ; GCN-NEXT: v_add_i32_e32 v1, vcc, 0x20000, v1
1129
- ; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x20000, v0
1119
+ ; GCN-NEXT: v_readfirstlane_b32 s2, v0
1120
+ ; GCN-NEXT: v_readfirstlane_b32 s4, v1
1121
+ ; GCN-NEXT: s_and_b32 s5, s4, 0xffff0000
1122
+ ; GCN-NEXT: s_add_i32 s4, s4, 2
1123
+ ; GCN-NEXT: s_and_b32 s6, s2, 0xffff0000
1124
+ ; GCN-NEXT: s_add_i32 s2, s2, 2
1125
+ ; GCN-NEXT: s_and_b32 s4, s4, 0xffff
1126
+ ; GCN-NEXT: s_and_b32 s2, s2, 0xffff
1127
+ ; GCN-NEXT: s_or_b32 s4, s5, s4
1128
+ ; GCN-NEXT: s_or_b32 s2, s6, s2
1129
+ ; GCN-NEXT: s_add_i32 s4, s4, 0x20000
1130
+ ; GCN-NEXT: s_add_i32 s5, s2, 0x20000
1131
+ ; GCN-NEXT: s_mov_b32 s2, -1
1132
+ ; GCN-NEXT: v_mov_b32_e32 v0, s5
1133
+ ; GCN-NEXT: v_mov_b32_e32 v1, s4
1130
1134
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1131
1135
; GCN-NEXT: s_endpgm
1132
1136
;
@@ -1139,14 +1143,20 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
1139
1143
; VI-NEXT: v_mov_b32_e32 v3, s1
1140
1144
; VI-NEXT: s_waitcnt lgkmcnt(0)
1141
1145
; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0
1142
- ; VI-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
1143
- ; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0
1144
- ; VI-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
1145
- ; VI-NEXT: v_add_u32_e32 v1, vcc, 2, v1
1146
- ; VI-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1147
- ; VI-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1148
- ; VI-NEXT: v_add_u32_e32 v1, vcc, 0x20000, v1
1149
- ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x20000, v0
1146
+ ; VI-NEXT: v_readfirstlane_b32 s0, v1
1147
+ ; VI-NEXT: v_readfirstlane_b32 s1, v0
1148
+ ; VI-NEXT: s_and_b32 s2, s1, 0xffff0000
1149
+ ; VI-NEXT: s_add_i32 s1, s1, 2
1150
+ ; VI-NEXT: s_and_b32 s3, s0, 0xffff0000
1151
+ ; VI-NEXT: s_add_i32 s0, s0, 2
1152
+ ; VI-NEXT: s_and_b32 s0, s0, 0xffff
1153
+ ; VI-NEXT: s_and_b32 s1, s1, 0xffff
1154
+ ; VI-NEXT: s_or_b32 s0, s3, s0
1155
+ ; VI-NEXT: s_or_b32 s1, s2, s1
1156
+ ; VI-NEXT: s_add_i32 s0, s0, 0x20000
1157
+ ; VI-NEXT: s_add_i32 s1, s1, 0x20000
1158
+ ; VI-NEXT: v_mov_b32_e32 v0, s1
1159
+ ; VI-NEXT: v_mov_b32_e32 v1, s0
1150
1160
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1151
1161
; VI-NEXT: s_endpgm
1152
1162
;
0 commit comments