Skip to content

Commit feedabf

Browse files
committed
AMDGPU: Break 64-bit arguments into 32-bit pieces
llvm-svn: 338421
1 parent 05220a9 commit feedabf

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,12 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
701701
if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
702702
EVT ScalarVT = VT.getScalarType();
703703
unsigned Size = ScalarVT.getSizeInBits();
704-
if (Size == 32 || Size == 64)
704+
if (Size == 32)
705705
return ScalarVT.getSimpleVT();
706706

707+
if (Size == 64)
708+
return MVT::i32;
709+
707710
if (Size == 16 &&
708711
Subtarget->has16BitInsts() &&
709712
isPowerOf2_32(VT.getVectorNumElements()))
@@ -721,9 +724,12 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
721724
EVT ScalarVT = VT.getScalarType();
722725
unsigned Size = ScalarVT.getSizeInBits();
723726

724-
if (Size == 32 || Size == 64)
727+
if (Size == 32)
725728
return NumElts;
726729

730+
if (Size == 64)
731+
return 2 * NumElts;
732+
727733
// FIXME: Fails to break down as we want with v3.
728734
if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
729735
return VT.getVectorNumElements() / 2;
@@ -740,13 +746,20 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
740746
unsigned NumElts = VT.getVectorNumElements();
741747
EVT ScalarVT = VT.getScalarType();
742748
unsigned Size = ScalarVT.getSizeInBits();
743-
if (Size == 32 || Size == 64) {
749+
if (Size == 32) {
744750
RegisterVT = ScalarVT.getSimpleVT();
745751
IntermediateVT = RegisterVT;
746752
NumIntermediates = NumElts;
747753
return NumIntermediates;
748754
}
749755

756+
if (Size == 64) {
757+
RegisterVT = MVT::i32;
758+
IntermediateVT = RegisterVT;
759+
NumIntermediates = 2 * NumElts;
760+
return NumIntermediates;
761+
}
762+
750763
// FIXME: We should fix the ABI to be the same on targets without 16-bit
751764
// support, but unless we can properly handle 3-vectors, it will be still be
752765
// inconsistent.

llvm/test/CodeGen/AMDGPU/call-argument-types.ll

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ declare void @external_void_func_f16(half) #0
2525
declare void @external_void_func_f32(float) #0
2626
declare void @external_void_func_f64(double) #0
2727
declare void @external_void_func_v2f32(<2 x float>) #0
28+
declare void @external_void_func_v2f64(<2 x double>) #0
29+
declare void @external_void_func_v3f64(<3 x double>) #0
2830

2931
declare void @external_void_func_v2i16(<2 x i16>) #0
3032
declare void @external_void_func_v2f16(<2 x half>) #0
@@ -274,10 +276,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
274276
ret void
275277
}
276278

279+
; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm:
280+
; GCN-DAG: v_mov_b32_e32 v0, 1
281+
; GCN-DAG: v_mov_b32_e32 v1, 2
282+
; GCN-DAG: v_mov_b32_e32 v2, 3
283+
; GCN-DAG: v_mov_b32_e32 v3, 4
284+
; GCN: s_swappc_b64
285+
define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
286+
call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
287+
ret void
288+
}
289+
277290
; GCN-LABEL: {{^}}test_call_external_void_func_v3i64:
278291
; GCN: buffer_load_dwordx4 v[0:3]
279-
; GCN: v_mov_b32_e32 v4, s
280-
; GCN: v_mov_b32_e32 v5, s
292+
; GCN: v_mov_b32_e32 v4, 1
293+
; GCN: v_mov_b32_e32 v5, 2
281294
; GCN: s_waitcnt
282295
; GCN-NEXT: s_swappc_b64
283296
define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
@@ -288,13 +301,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
288301
ret void
289302
}
290303

291-
; FIXME: Immedites should fold directly into v_mov_b32s
292304
; GCN-LABEL: {{^}}test_call_external_void_func_v4i64:
293305
; GCN: buffer_load_dwordx4 v[0:3]
294-
; GCN-DAG: v_mov_b32_e32 v4, s
295-
; GCN-DAG: v_mov_b32_e32 v5, s
296-
; GCN-DAG: v_mov_b32_e32 v6, s
297-
; GCN-DAG: v_mov_b32_e32 v7, s
306+
; GCN-DAG: v_mov_b32_e32 v4, 1
307+
; GCN-DAG: v_mov_b32_e32 v5, 2
308+
; GCN-DAG: v_mov_b32_e32 v6, 3
309+
; GCN-DAG: v_mov_b32_e32 v7, 4
298310

299311
; GCN: s_waitcnt
300312
; GCN-NEXT: s_swappc_b64
@@ -342,6 +354,30 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
342354
ret void
343355
}
344356

357+
; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm:
358+
; GCN: v_mov_b32_e32 v0, 0{{$}}
359+
; GCN: v_mov_b32_e32 v1, 2.0
360+
; GCN: v_mov_b32_e32 v2, 0{{$}}
361+
; GCN: v_mov_b32_e32 v3, 0x40100000
362+
; GCN: s_swappc_b64
363+
define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
364+
call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
365+
ret void
366+
}
367+
368+
; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm:
369+
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
370+
; GCN-DAG: v_mov_b32_e32 v1, 2.0
371+
; GCN-DAG: v_mov_b32_e32 v2, 0{{$}}
372+
; GCN-DAG: v_mov_b32_e32 v3, 0x40100000
373+
; GCN-DAG: v_mov_b32_e32 v4, 0{{$}}
374+
; GCN-DAG: v_mov_b32_e32 v5, 0x40200000
375+
; GCN-DAG: s_swappc_b64
376+
define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
377+
call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
378+
ret void
379+
}
380+
345381
; GCN-LABEL: {{^}}test_call_external_void_func_v2i16:
346382
; GFX9: buffer_load_dword v0
347383
; GFX9-NOT: v0

0 commit comments

Comments
 (0)