Skip to content

Commit f65e3af

Browse files
authored
[flang] Implement COMPLEX(10) passing and return ABI for X86-64 linux (#74094)
COMPLEX(10) passing by value and returning follows C complex passing/returning ABI. Cover the COMPLEX(10) case (X87 / __Complex long double on X86-64). Implements System V ABI for AMD64 version 1.0. The LLVM signatures match the one generated by clang for the __Complex long double case. Note that a FIXME is added for the COMPLEX(8) case that is incorrect in a corner case. This will be fixed when dealing with passing derived type by value in BIND(C) context.
1 parent 71f2ec2 commit f65e3af

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

flang/lib/Optimizer/CodeGen/Target.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,20 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
261261
// <2 x t> vector of 2 eleTy
262262
marshal.emplace_back(fir::VectorType::get(2, eleTy), AT{});
263263
} else if (sem == &llvm::APFloat::IEEEdouble()) {
264+
// FIXME: In case of SSE register exhaustion, the ABI here may be
265+
// incorrect since LLVM may pass the real via register and the imaginary
266+
// part via the stack while the ABI it should be all in register or all
267+
// in memory. Register occupancy must be analyzed here.
264268
// two distinct double arguments
265269
marshal.emplace_back(eleTy, AT{});
266270
marshal.emplace_back(eleTy, AT{});
271+
} else if (sem == &llvm::APFloat::x87DoubleExtended()) {
272+
// Use a type that will be translated into LLVM as:
273+
// { x86_fp80, x86_fp80 } struct of 2 fp128, byval, align 16
274+
marshal.emplace_back(
275+
fir::ReferenceType::get(mlir::TupleType::get(
276+
eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
277+
AT{/*align=*/16, /*byval=*/true});
267278
} else if (sem == &llvm::APFloat::IEEEquad()) {
268279
// Use a type that will be translated into LLVM as:
269280
// { fp128, fp128 } struct of 2 fp128, byval, align 16
@@ -290,6 +301,11 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
290301
marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
291302
mlir::TypeRange{eleTy, eleTy}),
292303
AT{});
304+
} else if (sem == &llvm::APFloat::x87DoubleExtended()) {
305+
// { x86_fp80, x86_fp80 }
306+
marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
307+
mlir::TypeRange{eleTy, eleTy}),
308+
AT{});
293309
} else if (sem == &llvm::APFloat::IEEEquad()) {
294310
// Use a type that will be translated into LLVM as:
295311
// { fp128, fp128 } struct of 2 fp128, sret, align 16
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Test COMPLEX(10) passing and returning on X86
2+
// REQUIRES: x86-registered-target
3+
// RUN: fir-opt --target-rewrite="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64
4+
// RUN: tco -target="x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64_LLVM
5+
6+
func.func @returncomplex10() -> !fir.complex<10> {
7+
%1 = fir.zero_bits !fir.complex<10>
8+
return %1 : !fir.complex<10>
9+
}
10+
// AMD64-LABEL: func.func @returncomplex10() -> tuple<!fir.real<10>, !fir.real<10>> {
11+
// AMD64: %[[VAL_0:.*]] = fir.zero_bits !fir.complex<10>
12+
// AMD64: %[[VAL_1:.*]] = fir.alloca tuple<!fir.real<10>, !fir.real<10>>
13+
// AMD64: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<tuple<!fir.real<10>, !fir.real<10>>>) -> !fir.ref<!fir.complex<10>>
14+
// AMD64: fir.store %[[VAL_0]] to %[[VAL_2]] : !fir.ref<!fir.complex<10>>
15+
// AMD64: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<tuple<!fir.real<10>, !fir.real<10>>>
16+
// AMD64: return %[[VAL_3]] : tuple<!fir.real<10>, !fir.real<10>>
17+
18+
// AMD64_LLVM: define { x86_fp80, x86_fp80 } @returncomplex10()
19+
20+
func.func @takecomplex10(%z: !fir.complex<10>) {
21+
%0 = fir.alloca !fir.complex<10>
22+
fir.store %z to %0 : !fir.ref<!fir.complex<10>>
23+
return
24+
}
25+
// AMD64-LABEL: func.func @takecomplex10(
26+
// AMD64-SAME: %[[VAL_0:.*]]: !fir.ref<tuple<!fir.real<10>, !fir.real<10>>> {llvm.align = 16 : i32, llvm.byval = tuple<!fir.real<10>, !fir.real<10>>}) {
27+
// AMD64: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<tuple<!fir.real<10>, !fir.real<10>>>) -> !fir.ref<!fir.complex<10>>
28+
// AMD64: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.complex<10>>
29+
// AMD64: %[[VAL_3:.*]] = fir.alloca !fir.complex<10>
30+
// AMD64: fir.store %[[VAL_2]] to %[[VAL_3]] : !fir.ref<!fir.complex<10>>
31+
32+
// AMD64_LLVM: define void @takecomplex10(ptr byval({ x86_fp80, x86_fp80 }) align 16 %0)

0 commit comments

Comments
 (0)