1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2
2
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
3
3
4
+ define void @scalar (float %num , ptr addrspace (1 ) %p ) {
5
+ ; CHECK-LABEL: scalar:
6
+ ; CHECK: ; %bb.0: ; %entry
7
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v2
9
+ ; CHECK-NEXT: v_mov_b32_e32 v2, v1
10
+ ; CHECK-NEXT: v_bfe_u32 v1, v0, 16, 1
11
+ ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
12
+ ; CHECK-NEXT: v_add3_u32 v1, v1, v0, s4
13
+ ; CHECK-NEXT: v_or_b32_e32 v4, 0x400000, v0
14
+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
15
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
16
+ ; CHECK-NEXT: global_store_short_d16_hi v[2:3], v0, off
17
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
18
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
19
+ entry:
20
+ %conv = fptrunc float %num to bfloat
21
+ store bfloat %conv , ptr addrspace (1 ) %p , align 8
22
+ ret void
23
+ }
24
+
4
25
define void @v2 (<2 x float > %num , ptr addrspace (1 ) %p ) {
5
26
; CHECK-LABEL: v2:
6
27
; CHECK: ; %bb.0: ; %entry
@@ -27,6 +48,40 @@ entry:
27
48
ret void
28
49
}
29
50
51
+ define void @v3 (<3 x float > %num , ptr addrspace (1 ) %p ) {
52
+ ; CHECK-LABEL: v3:
53
+ ; CHECK: ; %bb.0: ; %entry
54
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55
+ ; CHECK-NEXT: v_mov_b32_e32 v5, v4
56
+ ; CHECK-NEXT: v_mov_b32_e32 v4, v3
57
+ ; CHECK-NEXT: v_bfe_u32 v3, v0, 16, 1
58
+ ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
59
+ ; CHECK-NEXT: v_add3_u32 v3, v3, v0, s4
60
+ ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
61
+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
62
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
63
+ ; CHECK-NEXT: v_bfe_u32 v3, v1, 16, 1
64
+ ; CHECK-NEXT: v_add3_u32 v3, v3, v1, s4
65
+ ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
66
+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
67
+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
68
+ ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
69
+ ; CHECK-NEXT: v_perm_b32 v0, v1, v0, s5
70
+ ; CHECK-NEXT: v_bfe_u32 v1, v2, 16, 1
71
+ ; CHECK-NEXT: v_add3_u32 v1, v1, v2, s4
72
+ ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v2
73
+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
74
+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
75
+ ; CHECK-NEXT: global_store_short_d16_hi v[4:5], v1, off offset:4
76
+ ; CHECK-NEXT: global_store_dword v[4:5], v0, off
77
+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
78
+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
79
+ entry:
80
+ %conv = fptrunc <3 x float > %num to <3 x bfloat>
81
+ store <3 x bfloat> %conv , ptr addrspace (1 ) %p , align 8
82
+ ret void
83
+ }
84
+
30
85
define void @v4 (<4 x float > %num , ptr addrspace (1 ) %p ) {
31
86
; CHECK-LABEL: v4:
32
87
; CHECK: ; %bb.0: ; %entry
0 commit comments