|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
2 | 3 |
|
3 |
| -; SI-LABEL: {{^}}s_clear_msb: |
4 |
| -; SI: s_bitset0_b32 s{{[0-9]+}}, 31 |
5 | 4 | define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
|
| 5 | +; SI-LABEL: s_clear_msb: |
| 6 | +; SI: ; %bb.0: |
| 7 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 8 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 9 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 10 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 11 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 12 | +; SI-NEXT: s_bitset0_b32 s4, 31 |
| 13 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 14 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 15 | +; SI-NEXT: s_endpgm |
6 | 16 | %x = and i32 %in, 2147483647
|
7 | 17 | store i32 %x, ptr addrspace(1) %out
|
8 | 18 | ret void
|
9 | 19 | }
|
10 | 20 |
|
11 |
| -; SI-LABEL: {{^}}s_set_msb: |
12 |
| -; SI: s_bitset1_b32 s{{[0-9]+}}, 31 |
13 | 21 | define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
|
| 22 | +; SI-LABEL: s_set_msb: |
| 23 | +; SI: ; %bb.0: |
| 24 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 25 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 26 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 27 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 28 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 29 | +; SI-NEXT: s_bitset1_b32 s4, 31 |
| 30 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 31 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 32 | +; SI-NEXT: s_endpgm |
14 | 33 | %x = or i32 %in, 2147483648
|
15 | 34 | store i32 %x, ptr addrspace(1) %out
|
16 | 35 | ret void
|
17 | 36 | }
|
18 | 37 |
|
19 |
| -; SI-LABEL: {{^}}s_clear_lsb: |
20 |
| -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, -2 |
21 | 38 | define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
|
| 39 | +; SI-LABEL: s_clear_lsb: |
| 40 | +; SI: ; %bb.0: |
| 41 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 42 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 43 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 44 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 45 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 46 | +; SI-NEXT: s_and_b32 s4, s4, -2 |
| 47 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 48 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 49 | +; SI-NEXT: s_endpgm |
22 | 50 | %x = and i32 %in, 4294967294
|
23 | 51 | store i32 %x, ptr addrspace(1) %out
|
24 | 52 | ret void
|
25 | 53 | }
|
26 | 54 |
|
27 |
| -; SI-LABEL: {{^}}s_set_lsb: |
28 |
| -; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 |
29 | 55 | define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
|
| 56 | +; SI-LABEL: s_set_lsb: |
| 57 | +; SI: ; %bb.0: |
| 58 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 59 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 60 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 61 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 62 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 63 | +; SI-NEXT: s_or_b32 s4, s4, 1 |
| 64 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 65 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 66 | +; SI-NEXT: s_endpgm |
30 | 67 | %x = or i32 %in, 1
|
31 | 68 | store i32 %x, ptr addrspace(1) %out
|
32 | 69 | ret void
|
33 | 70 | }
|
34 | 71 |
|
35 |
| -; SI-LABEL: {{^}}s_clear_midbit: |
36 |
| -; SI: s_bitset0_b32 s{{[0-9]+}}, 8 |
37 | 72 | define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
|
| 73 | +; SI-LABEL: s_clear_midbit: |
| 74 | +; SI: ; %bb.0: |
| 75 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 76 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 77 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 78 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 79 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 80 | +; SI-NEXT: s_bitset0_b32 s4, 8 |
| 81 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 82 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 83 | +; SI-NEXT: s_endpgm |
38 | 84 | %x = and i32 %in, 4294967039
|
39 | 85 | store i32 %x, ptr addrspace(1) %out
|
40 | 86 | ret void
|
41 | 87 | }
|
42 | 88 |
|
43 |
| -; SI-LABEL: {{^}}s_set_midbit: |
44 |
| -; SI: s_bitset1_b32 s{{[0-9]+}}, 8 |
45 | 89 | define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
|
| 90 | +; SI-LABEL: s_set_midbit: |
| 91 | +; SI: ; %bb.0: |
| 92 | +; SI-NEXT: s_load_dword s4, s[2:3], 0xb |
| 93 | +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 |
| 94 | +; SI-NEXT: s_mov_b32 s3, 0xf000 |
| 95 | +; SI-NEXT: s_mov_b32 s2, -1 |
| 96 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 97 | +; SI-NEXT: s_bitset1_b32 s4, 8 |
| 98 | +; SI-NEXT: v_mov_b32_e32 v0, s4 |
| 99 | +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| 100 | +; SI-NEXT: s_endpgm |
46 | 101 | %x = or i32 %in, 256
|
47 | 102 | store i32 %x, ptr addrspace(1) %out
|
48 | 103 | ret void
|
49 | 104 | }
|
50 | 105 |
|
51 | 106 | @gv = external addrspace(1) global i32
|
52 | 107 |
|
53 |
| -; Make sure there's no verifier error with an undef source. |
54 |
| -; SI-LABEL: {{^}}bitset_verifier_error: |
55 |
| -; SI-NOT: %bb.1: |
56 |
| -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff |
57 | 108 | define void @bitset_verifier_error() local_unnamed_addr #0 {
|
| 109 | +; SI-LABEL: bitset_verifier_error: |
| 110 | +; SI: ; %bb.0: ; %bb |
| 111 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 112 | +; SI-NEXT: s_getpc_b64 s[4:5] |
| 113 | +; SI-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 |
| 114 | +; SI-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 |
| 115 | +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| 116 | +; SI-NEXT: s_mov_b32 s7, 0xf000 |
| 117 | +; SI-NEXT: s_mov_b32 s6, -1 |
| 118 | +; SI-NEXT: s_waitcnt lgkmcnt(0) |
| 119 | +; SI-NEXT: s_and_b32 s8, s4, 0x7fffffff |
| 120 | +; SI-NEXT: v_mov_b32_e32 v0, s8 |
| 121 | +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 122 | +; SI-NEXT: s_waitcnt expcnt(0) |
| 123 | +; SI-NEXT: v_mov_b32_e32 v0, 0x3f7fbe77 |
| 124 | +; SI-NEXT: v_cmp_ge_f32_e64 s[4:5], |s4|, v0 |
| 125 | +; SI-NEXT: s_and_b64 vcc, exec, s[4:5] |
| 126 | +; SI-NEXT: s_cbranch_vccnz .LBB6_2 |
| 127 | +; SI-NEXT: ; %bb.1: ; %bb5 |
| 128 | +; SI-NEXT: .LBB6_2: ; %bb6 |
58 | 129 | bb:
|
59 | 130 | %i = call float @llvm.fabs.f32(float undef) #0
|
60 | 131 | %i1 = bitcast float %i to i32
|
|
0 commit comments