[LLVM] Fix incorrect alignment on AMDGPU variadics

jhuber6 · jhuber6 · commit ee06122c86d0 · 2024-06-25T10:05:36.000-05:00
Summary:
The variadics lowering for AMDGPU puts all the arguments into a void
pointer struct. The current logic dictates that the minimum alignment is
four regardless of what  the underlying type is. This is incorrect in
the following case.

```c
void foo(int, ...);

void bar() {
  int x;
  void *p;
  foo(0, x, p);
}
```
Here, because the minimum alignment is 4, we will only increment the
buffer by 4, resulting in an incorrect alignment when we then try to
access the void pointer. We need to set a minimum of 4, but increase it
to 8 in cases like this.
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -121,7 +121,7 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
 RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty, AggValueSlot Slot) const {
   const bool IsIndirect = false;
-  const bool AllowHigherAlign = false;
+  const bool AllowHigherAlign = true;
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
                           getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(4), AllowHigherAlign, Slot);
@@ -212,13 +212,8 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
-  if (Variadic) {
-    return ABIArgInfo::getDirect(/*T=*/nullptr,
-                                 /*Offset=*/0,
-                                 /*Padding=*/nullptr,
-                                 /*CanBeFlattened=*/false,
-                                 /*Align=*/0);
-  }
+  if (Variadic)
+    return ABIArgInfo::getDirect();
 
   if (isAggregateTypeForABI(Ty)) {
     // Records with non-trivial destructors/copy-constructors should not be
diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -1,4 +1,3 @@
-// REQUIRES: amdgpu-registered-target
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
 // RUN: %clang_cc1 -cc1 -std=c23 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s -o - | FileCheck %s
 
@@ -179,11 +178,9 @@ typedef struct
 // CHECK-LABEL: define {{[^@]+}}@one_pair_f64
 // CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT]], double [[V0_COERCE1]], 1
-// CHECK-NEXT:    tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (...) @sink_0(double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]]) #[[ATTR2]]
 // CHECK-NEXT:    ret void
 //
 void one_pair_f64(int f0, double f1, pair_f64 v0)
@@ -220,10 +217,9 @@ typedef union
 // CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
-// CHECK-NEXT:    tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (...) @sink_0(float [[TMP0]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], float [[TMP0]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], float [[TMP0]]) #[[ATTR2]]
 // CHECK-NEXT:    ret void
 //
 void one_pair_union_f32_i32(int f0, double f1, union_f32_i32 v0)
@@ -242,10 +238,9 @@ typedef union
 // CHECK-LABEL: define {{[^@]+}}@one_pair_transparent_union_f32_i32
 // CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_TRANSPARENT_UNION_F32_I32:%.*]] poison, i32 [[V0_COERCE]], 0
-// CHECK-NEXT:    tail call void (...) @sink_0([[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_TRANSPARENT_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (...) @sink_0(i32 [[V0_COERCE]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], i32 [[V0_COERCE]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], i32 [[V0_COERCE]]) #[[ATTR2]]
 // CHECK-NEXT:    ret void
 //
 void one_pair_transparent_union_f32_i32(int f0, double f1, transparent_union_f32_i32 v0)
@@ -277,12 +272,9 @@ void multiple_one(int f0, double f1, int v0, double v1)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
 // CHECK-NEXT:    [[CONV:%.*]] = fpext float [[V1]] to double
-// CHECK-NEXT:    [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
-// CHECK-NEXT:    tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
-// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (...) @sink_0(double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
+// CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], double [[V0_COERCE0]], double [[V0_COERCE1]], double noundef [[CONV]], float [[TMP0]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    ret void
 //
 void multiple_two(int f0, double f1, pair_f64 v0, float v1, union_f32_i32 v2, int v3)
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -934,7 +934,11 @@ struct Amdgpu final : public VariadicABIInfo {
   }
 
   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
-    return {Align(4), false};
+    const unsigned MinAlign = 1;
+    Align A = DL.getABITypeAlign(Parameter);
+    if (A < MinAlign)
+      A = Align(MinAlign);
+    return {A, false};
   }
 };
 
diff --git a/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll

Original file line number	Diff line number	Diff line change
`@@ -934,7 +934,11 @@ struct Amdgpu final : public VariadicABIInfo {`
`934`	`934`	`}`
`935`	`935`
`936`	`936`	`VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {`
`937`		`- return {Align(4), false};`
	`937`	`+ const unsigned MinAlign = 1;`
	`938`	`+ Align A = DL.getABITypeAlign(Parameter);`
	`939`	`+ if (A < MinAlign)`
	`940`	`+ A = Align(MinAlign);`
	`941`	`+ return {A, false};`
`938`	`942`	`}`
`939`	`943`	`};`
`940`	`944`