-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NFC][AMDGPU] Autogenerate tests for uniform i32 promo in ISel #106382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesMany tests were easy to update, but these are quite big and I think it's better to autogenerate them to see the difference well. Patch is 147.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106382.diff 3 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index ee1df9aa0d6cea..0a2cac5a3e26ba 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -1,415 +1,729 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
-; GCN-LABEL: {{^}}float4_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
-; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
-; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
-; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]]
-; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: float4_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 2
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[2:3]
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 3
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel
store float %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}int4_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2
-; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1
-; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc
-; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: int4_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 2
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 3
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: v_cndmask_b32_e32 v2, 4, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel
store i32 %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}double4_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
-; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}}
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}}
-; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}}
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}}
-; GCN: store_dwordx2 v[{{[0-9:]+}}]
define amdgpu_kernel void @double4_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: double4_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_mov_b32 s2, 0x3ff028f5
+; GCN-NEXT: s_mov_b32 s3, 0xc28f5c29
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b32 s2, s2, 0x3f847ae1
+; GCN-NEXT: s_cselect_b32 s3, s3, 0x47ae147b
+; GCN-NEXT: s_cmp_eq_u32 s4, 2
+; GCN-NEXT: s_cselect_b32 s3, 0xe147ae14, s3
+; GCN-NEXT: s_cselect_b32 s2, 0x4000147a, s2
+; GCN-NEXT: s_cmp_eq_u32 s4, 3
+; GCN-NEXT: s_cselect_b32 s2, 0x40100a3d, s2
+; GCN-NEXT: s_cselect_b32 s3, 0x70a3d70a, s3
+; GCN-NEXT: v_mov_b32_e32 v3, s1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel
store double %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}double5_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
-; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}}
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}}
-; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}}
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}}
-; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 4
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40140a3d, s{{[0-9]+}}
-; GCN: store_dwordx2 v[{{[0-9:]+}}]
define amdgpu_kernel void @double5_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: double5_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s6, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_mov_b32 s2, 0x3ff028f5
+; GCN-NEXT: s_mov_b32 s3, 0xc28f5c29
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s6, 1
+; GCN-NEXT: s_cselect_b32 s2, s2, 0x3f847ae1
+; GCN-NEXT: s_cselect_b32 s3, s3, 0x47ae147b
+; GCN-NEXT: s_cmp_eq_u32 s6, 2
+; GCN-NEXT: s_cselect_b32 s8, 0xe147ae14, s3
+; GCN-NEXT: s_cselect_b32 s7, 0x4000147a, s2
+; GCN-NEXT: s_cmp_eq_u32 s6, 3
+; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: s_and_b64 s[4:5], s[2:3], exec
+; GCN-NEXT: s_cselect_b32 s9, 0x40100a3d, s7
+; GCN-NEXT: s_cmp_eq_u32 s6, 4
+; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
+; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec
+; GCN-NEXT: s_cselect_b32 s6, 0x40140a3d, s9
+; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
+; GCN-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; GCN-NEXT: s_cselect_b32 s2, 0x70a3d70a, s8
+; GCN-NEXT: v_mov_b32_e32 v3, s1
+; GCN-NEXT: v_mov_b32_e32 v0, s2
+; GCN-NEXT: v_mov_b32_e32 v1, s6
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel
store double %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}half4_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
-; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
-; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
-; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
-; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
-; GCN: store_short v[{{[0-9:]+}}], v[[VRL]]
define amdgpu_kernel void @half4_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: half4_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_mov_b32 s2, 0x40003c00
+; GCN-NEXT: s_mov_b32 s3, 0x44004200
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshl_b32 s4, s4, 4
+; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: flat_store_short v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel
store half %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}float2_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
-; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
-; GCN: store_dword v[{{[0-9:]+}}], [[V1]]
define amdgpu_kernel void @float2_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: float2_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[2:3]
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel
store float %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}double2_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1
-; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b
-; GCN: store_dwordx2 v[{{[0-9:]+}}]
define amdgpu_kernel void @double2_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: double2_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_mov_b32 s2, 0x3ff028f5
+; GCN-NEXT: s_mov_b32 s3, 0xc28f5c29
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b32 s2, s2, 0x3f847ae1
+; GCN-NEXT: s_cselect_b32 s3, s3, 0x47ae147b
+; GCN-NEXT: v_mov_b32_e32 v3, s1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel
store double %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}half8_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
-; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
-; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
-; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
-; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
-; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
-; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
-; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
-; GCN: store_short v[{{[0-9:]+}}], [[V7]]
define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: half8_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3c00
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 2
+; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4200
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4400
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 4
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4500
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 5
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4600
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 6
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4700
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 7
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4800
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_short v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel
store half %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}short8_extelt:
-; GCN-NOT: buffer_
-; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
-; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
-; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
-; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
-; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
-; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
-; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
-; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
-; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
-; GCN: store_short v[{{[0-9:]+}}], [[V7]]
define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: short8_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_eq_u32 s4, 1
+; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 2
+; GCN-NEXT: v_cndmask_b32_e64 v0, 1, 2, s[2:3]
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 3
+; GCN-NEXT: v_cndmask_b32_e32 v0, 3, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 4
+; GCN-NEXT: v_cndmask_b32_e32 v0, 4, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 5
+; GCN-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 6
+; GCN-NEXT: v_cndmask_b32_e32 v0, 6, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: s_cmp_lg_u32 s4, 7
+; GCN-NEXT: v_cndmask_b32_e32 v0, 7, v0, vcc
+; GCN-NEXT: s_cselect_b64 vcc, -1, 0
+; GCN-NEXT: v_cndmask_b32_e32 v2, 8, v0, vcc
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_short v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel
store i16 %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}float8_extelt:
-; GCN-DAG: s_load_dwordx2 s[0:1], s[2:3], 0x24
-; GCN-DAG: s_load_dword [[S0:s[0-9]+]], s[2:3], 0x2c
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
-; GCN-DAG: s_waitcnt lgkmcnt(0)
-; GCN-DAG: s_mov_b32 m0, [[S0]]
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
-; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], v{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]]
define amdgpu_kernel void @float8_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: float8_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_load_dword s2, s[2:3], 0x2c
+; GCN-NEXT: v_mov_b32_e32 v0, 1.0
+; GCN-NEXT: v_mov_b32_e32 v1, 2.0
+; GCN-NEXT: v_mov_b32_e32 v2, 0x40400000
+; GCN-NEXT: v_mov_b32_e32 v3, 4.0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 m0, s2
+; GCN-NEXT: v_mov_b32_e32 v4, 0x40a00000
+; GCN-NEXT: v_mov_b32_e32 v5, 0x40c00000
+; GCN-NEXT: v_mov_b32_e32 v6, 0x40e00000
+; GCN-NEXT: v_mov_b32_e32 v7, 0x41000000
+; GCN-NEXT: v_movrels_b32_e32 v2, v0
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
store float %ext, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}double8_extelt:
-; GCN-NOT: buffer_
-; GCN-NOT: s_or_b32
-; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
-; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
-; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
-; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
-; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
+; GCN-LABEL: double8_extelt:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x24
+; GCN-NEXT: s_load_dword s18, s[2:3], 0x2c
+; GCN-NEXT: s_mov_b32 s0, 0
+; GCN-NEXT: s_mov_b32 s15, 0x40200000
+; GCN-NEXT: s_mov_b32 s13, 0x401c0000
+; GCN-NEXT: s_mov_b32 s11, 0x40180000
+; GCN-NEXT: s_mov_b32 s9, 0x40140000
+; GCN-NEXT: s_mov_b32 s7, 0x40100000
+; GCN-NEXT: s_mov_b32 s5, 0x40080000
+; GCN-NEXT: s_mov_b32 s3, 2.0
+; GCN-NEXT: s_mov_b32 s1, 0x3ff00000
+; GCN-NEXT: s_mov_b32 s2, s0
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s6, s0
+; GCN-NEXT: s_mov_b32 s8, s0
+; GCN-NEXT: s_mov_b32 s10, s0
+; GCN-NEXT: s_mov_b32 s12, s0
+; GCN-NEXT: s_mov_b32 s14, s0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshl_b32 s18, s18, 1
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: v_mov_b32_e32 v15, s15
+; GCN-NEXT: s_mov_b32 m0, s18
+; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: v_mov_b32_e32 v3, s3
+; GCN-NEXT: v_mov_b32_e32 v4, s4
+; GCN-NEXT: v_mov_b32_e32 v5, s5
+; GCN-NEXT: v_mov_b32_e32 v6, s6
+; GCN-NEXT: v_mov_b32_e32 v7, s7
+; GCN-NEXT: v_mov_b32_e32 v8, s8
+; GCN-NEXT: v_mov_b32_e32 v9, s9
+; GCN-NEXT: v_mov_b32_e32 v10, s10
+; GCN-NEXT: v_mov_b32_e32 v11, s11
+; GCN-NEXT: v_mov_b32_e32 v12, s12
+; GCN-NEXT: v_mov_b32_e32 v13, s13
+; GCN-NEXT: v_mov_b32_e32 v14, s14
+; GCN-NEXT: v_movrels_b32_e32 v16, v1
+; GCN-NEXT: v_movrels_b32_e32 v15, v0
+; GCN-NEXT: v_mov_b32_e32 v0, s16
+; GCN-NEXT: v_mov_b32_e32 v1, s17
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[15:16]
+; GCN-NEXT: s_endpgm
entry:
%ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5....
[truncated]
|
define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i8> %foo) #0 { | ||
; SI-LABEL: extract_vector_elt_v2i8: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you want to do deeper maintenance, you can stop using amdgpu_kernel and use inreg arguments to control SGPR inputs, and avoid all the kernel boilerplate. Plus use return values
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,VI %s | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=SI %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can also lose the FileCheck arguments
; CI-NEXT: v_or_b32_e32 v2, v3, v2 | ||
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x20000, v2 | ||
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 | ||
; CI-NEXT: s_endpgm | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like the other file these really should just use a regular function to get VGPR arguments, all this other noise is to just get a divergent VGPR in a kernel
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/4525 Here is the relevant piece of the build log for the reference
|
Many tests were easy to update, but these are quite big and I think it's better to autogenerate them to see the difference well.