|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
|
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
|
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
|
6 |
| -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s |
7 | 8 |
|
8 | 9 | define i8 @v_saddsat_i8(i8 %lhs, i8 %rhs) {
|
9 | 10 | ; GFX6-LABEL: v_saddsat_i8:
|
@@ -34,14 +35,32 @@ define i8 @v_saddsat_i8(i8 %lhs, i8 %rhs) {
|
34 | 35 | ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
|
35 | 36 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
36 | 37 | ;
|
37 |
| -; GFX10PLUS-LABEL: v_saddsat_i8: |
38 |
| -; GFX10PLUS: ; %bb.0: |
39 |
| -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
40 |
| -; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 |
41 |
| -; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 |
42 |
| -; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
43 |
| -; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0 |
44 |
| -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 38 | +; GFX10-LABEL: v_saddsat_i8: |
| 39 | +; GFX10: ; %bb.0: |
| 40 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 41 | +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 |
| 42 | +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 |
| 43 | +; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
| 44 | +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 |
| 45 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 46 | +; |
| 47 | +; GFX11-TRUE16-LABEL: v_saddsat_i8: |
| 48 | +; GFX11-TRUE16: ; %bb.0: |
| 49 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 50 | +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l |
| 51 | +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, 8, v0.l |
| 52 | +; GFX11-TRUE16-NEXT: v_add_nc_i16 v0.l, v0.l, v0.h clamp |
| 53 | +; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, 8, v0.l |
| 54 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 55 | +; |
| 56 | +; GFX11-FAKE16-LABEL: v_saddsat_i8: |
| 57 | +; GFX11-FAKE16: ; %bb.0: |
| 58 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 59 | +; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 |
| 60 | +; GFX11-FAKE16-NEXT: v_lshlrev_b16 v0, 8, v0 |
| 61 | +; GFX11-FAKE16-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
| 62 | +; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, 8, v0 |
| 63 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
45 | 64 | %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs)
|
46 | 65 | ret i8 %result
|
47 | 66 | }
|
@@ -76,11 +95,24 @@ define i16 @v_saddsat_i16(i16 %lhs, i16 %rhs) {
|
76 | 95 | ; GFX9-NEXT: v_add_i16 v0, v0, v1 clamp
|
77 | 96 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
78 | 97 | ;
|
79 |
| -; GFX10PLUS-LABEL: v_saddsat_i16: |
80 |
| -; GFX10PLUS: ; %bb.0: |
81 |
| -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
82 |
| -; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
83 |
| -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 98 | +; GFX10-LABEL: v_saddsat_i16: |
| 99 | +; GFX10: ; %bb.0: |
| 100 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 101 | +; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
| 102 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 103 | +; |
| 104 | +; GFX11-TRUE16-LABEL: v_saddsat_i16: |
| 105 | +; GFX11-TRUE16: ; %bb.0: |
| 106 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 107 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l |
| 108 | +; GFX11-TRUE16-NEXT: v_add_nc_i16 v0.l, v0.l, v0.h clamp |
| 109 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 110 | +; |
| 111 | +; GFX11-FAKE16-LABEL: v_saddsat_i16: |
| 112 | +; GFX11-FAKE16: ; %bb.0: |
| 113 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 114 | +; GFX11-FAKE16-NEXT: v_add_nc_i16 v0, v0, v1 clamp |
| 115 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
84 | 116 | %result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs)
|
85 | 117 | ret i16 %result
|
86 | 118 | }
|
|
0 commit comments