Skip to content

Commit ee06122

Browse files
committed
[LLVM] Fix incorrect alignment on AMDGPU variadics
Summary: The variadics lowering for AMDGPU puts all the arguments into a void pointer struct. The current logic dictates that the minimum alignment is four regardless of what the underlying type is. This is incorrect in the following case. ```c void foo(int, ...); void bar() { int x; void *p; foo(0, x, p); } ``` Here, because the minimum alignment is 4, we will only increment the buffer by 4, resulting in an incorrect alignment when we then try to access the void pointer. We need to set a minimum of 4, but increase it to 8 in cases like this.
1 parent dc27ff1 commit ee06122

File tree

4 files changed

+316
-307
lines changed

4 files changed

+316
-307
lines changed

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
121121
RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
122122
QualType Ty, AggValueSlot Slot) const {
123123
const bool IsIndirect = false;
124-
const bool AllowHigherAlign = false;
124+
const bool AllowHigherAlign = true;
125125
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
126126
getContext().getTypeInfoInChars(Ty),
127127
CharUnits::fromQuantity(4), AllowHigherAlign, Slot);
@@ -212,13 +212,8 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
212212

213213
Ty = useFirstFieldIfTransparentUnion(Ty);
214214

215-
if (Variadic) {
216-
return ABIArgInfo::getDirect(/*T=*/nullptr,
217-
/*Offset=*/0,
218-
/*Padding=*/nullptr,
219-
/*CanBeFlattened=*/false,
220-
/*Align=*/0);
221-
}
215+
if (Variadic)
216+
return ABIArgInfo::getDirect();
222217

223218
if (isAggregateTypeForABI(Ty)) {
224219
// Records with non-trivial destructors/copy-constructors should not be

clang/test/CodeGen/amdgpu-variadic-call.c

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// REQUIRES: amdgpu-registered-target
21
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
32
// RUN: %clang_cc1 -cc1 -std=c23 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s -o - | FileCheck %s
43

@@ -179,11 +178,9 @@ typedef struct
179178
// CHECK-LABEL: define {{[^@]+}}@one_pair_f64
180179
// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
181180
// CHECK-NEXT: entry:
182-
// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
183-
// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT]], double [[V0_COERCE1]], 1
184-
// CHECK-NEXT: tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
185-
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
186-
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
181+
// CHECK-NEXT: tail call void (...) @sink_0(double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
182+
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
183+
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
187184
// CHECK-NEXT: ret void
188185
//
189186
void one_pair_f64(int f0, double f1, pair_f64 v0)
@@ -220,10 +217,9 @@ typedef union
220217
// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
221218
// CHECK-NEXT: entry:
222219
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
223-
// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
224-
// CHECK-NEXT: tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
225-
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
226-
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
220+
// CHECK-NEXT: tail call void (...) @sink_0(float [[TMP0]]) #[[ATTR2]]
221+
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], float [[TMP0]]) #[[ATTR2]]
222+
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], float [[TMP0]]) #[[ATTR2]]
227223
// CHECK-NEXT: ret void
228224
//
229225
void one_pair_union_f32_i32(int f0, double f1, union_f32_i32 v0)
@@ -242,10 +238,9 @@ typedef union
242238
// CHECK-LABEL: define {{[^@]+}}@one_pair_transparent_union_f32_i32
243239
// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
244240
// CHECK-NEXT: entry:
245-
// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_TRANSPARENT_UNION_F32_I32:%.*]] poison, i32 [[V0_COERCE]], 0
246-
// CHECK-NEXT: tail call void (...) @sink_0([[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
247-
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
248-
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
241+
// CHECK-NEXT: tail call void (...) @sink_0(i32 [[V0_COERCE]]) #[[ATTR2]]
242+
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], i32 [[V0_COERCE]]) #[[ATTR2]]
243+
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], i32 [[V0_COERCE]]) #[[ATTR2]]
249244
// CHECK-NEXT: ret void
250245
//
251246
void one_pair_transparent_union_f32_i32(int f0, double f1, transparent_union_f32_i32 v0)
@@ -277,12 +272,9 @@ void multiple_one(int f0, double f1, int v0, double v1)
277272
// CHECK-NEXT: entry:
278273
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
279274
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[V1]] to double
280-
// CHECK-NEXT: [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
281-
// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
282-
// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
283-
// CHECK-NEXT: tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
284-
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
285-
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
275+
// CHECK-NEXT: tail call void (...) @sink_0(double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
276+
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
277+
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
286278
// CHECK-NEXT: ret void
287279
//
288280
void multiple_two(int f0, double f1, pair_f64 v0, float v1, union_f32_i32 v2, int v3)

llvm/lib/Transforms/IPO/ExpandVariadics.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,11 @@ struct Amdgpu final : public VariadicABIInfo {
934934
}
935935

936936
VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
937-
return {Align(4), false};
937+
const unsigned MinAlign = 1;
938+
Align A = DL.getABITypeAlign(Parameter);
939+
if (A < MinAlign)
940+
A = Align(MinAlign);
941+
return {A, false};
938942
}
939943
};
940944

0 commit comments

Comments
 (0)