@@ -332,3 +332,44 @@ define amdgpu_kernel void @scalar_to_vector_test6(ptr addrspace(1) %out, i8 zero
332
332
store <2 x half > %bc , ptr addrspace (1 ) %out
333
333
ret void
334
334
}
335
+
336
+ ; bitcast (scalar_to_vector x) -> any_extend x
337
+ define i64 @bitcast_combine_scalar_to_vector_v4i16 (i16 %arg ) {
338
+ ; SI-LABEL: bitcast_combine_scalar_to_vector_v4i16:
339
+ ; SI: ; %bb.0:
340
+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341
+ ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
342
+ ; SI-NEXT: v_and_b32_e32 v2, 0xff00, v0
343
+ ; SI-NEXT: v_bfe_u32 v0, v0, 8, 8
344
+ ; SI-NEXT: v_or_b32_e32 v2, v0, v2
345
+ ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v2
346
+ ; SI-NEXT: v_or_b32_e32 v0, v1, v3
347
+ ; SI-NEXT: v_or_b32_e32 v1, v2, v3
348
+ ; SI-NEXT: s_setpc_b64 s[30:31]
349
+ ;
350
+ ; VI-LABEL: bitcast_combine_scalar_to_vector_v4i16:
351
+ ; VI: ; %bb.0:
352
+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353
+ ; VI-NEXT: v_and_b32_e32 v1, 0xffffff00, v0
354
+ ; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
355
+ ; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
356
+ ; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
357
+ ; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
358
+ ; VI-NEXT: s_setpc_b64 s[30:31]
359
+ ;
360
+ ; GFX9-LABEL: bitcast_combine_scalar_to_vector_v4i16:
361
+ ; GFX9: ; %bb.0:
362
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363
+ ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v0
364
+ ; GFX9-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
365
+ ; GFX9-NEXT: s_mov_b32 s4, 0xffff
366
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1
367
+ ; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
368
+ ; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2
369
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
370
+ %arg.cast = bitcast i16 %arg to <2 x i8 >
371
+ %tmp1 = shufflevector <2 x i8 > %arg.cast , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
372
+ %tmp2 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
373
+ %cast = bitcast <8 x i8 > %tmp2 to i64
374
+ ret i64 %cast
375
+ }
0 commit comments