-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86_64] Fix empty field error in vaarg of C++. #101639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-x86 Author: Longsheng Mou (CoTinker) ChangesSuch struct types:
For such structures, Lo is NoClass and Hi is Integer/SSE. And when this structure argument is passed, the high part is passed at offset 8 in memory. So we should do special handling for these types in EmitVAArg.Fix #79790 and #86371. Full diff: https://github.com/llvm/llvm-project/pull/101639.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 26ff4e4ac0a3b..c7e84f70ae15f 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -3124,26 +3124,40 @@ RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
RegAddr = Tmp.withElementType(LTy);
- } else if (neededInt) {
- RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset),
- LTy, CharUnits::fromQuantity(8));
-
+ } else if (neededInt || neededSSE == 1) {
// Copy to a temporary if necessary to ensure the appropriate alignment.
auto TInfo = getContext().getTypeInfoInChars(Ty);
uint64_t TySize = TInfo.Width.getQuantity();
CharUnits TyAlign = TInfo.Align;
+ llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset;
+ uint64_t Alignment = neededInt ? 8 : 16;
+ if (auto Offset = AI.getDirectOffset()) {
+ Address Tmp = CGF.CreateMemTemp(Ty);
+ llvm::Type *TyHi = AI.getCoerceToType();
+ llvm::Value *Addr =
+ CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset);
+ llvm::Value *Src = CGF.Builder.CreateAlignedLoad(TyHi, Addr, TyAlign);
+ llvm::Value *PtrOffset = llvm::ConstantInt::get(CGF.Int32Ty, Offset);
+ Address Dst = Address(
+ CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset),
+ LTy, TyAlign);
+ CGF.Builder.CreateStore(Src, Dst);
+ RegAddr = Tmp.withElementType(LTy);
+ } else {
+ RegAddr =
+ Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset),
+ LTy, CharUnits::fromQuantity(Alignment));
+ }
+
// Copy into a temporary if the type is more aligned than the
// register save area.
- if (TyAlign.getQuantity() > 8) {
+ if (neededInt && TyAlign.getQuantity() > 8) {
Address Tmp = CGF.CreateMemTemp(Ty);
CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
RegAddr = Tmp;
}
- } else if (neededSSE == 1) {
- RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset),
- LTy, CharUnits::fromQuantity(16));
} else {
assert(neededSSE == 2 && "Invalid number of needed registers!");
// SSE registers are spaced 16 bytes apart in the register save
diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
index c4616a97e2055..1703b74ad10a2 100644
--- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp
+++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
@@ -29,6 +29,7 @@ typedef struct {
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S1]], align 8
// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
@@ -41,8 +42,11 @@ typedef struct {
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
-// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16
-// CHECK-NEXT: store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT: store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4
// CHECK-NEXT: br label [[VAARG_END:%.*]]
// CHECK: vaarg.in_mem:
// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
@@ -51,14 +55,209 @@ typedef struct {
// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
// CHECK-NEXT: br label [[VAARG_END]]
// CHECK: vaarg.end:
-// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false)
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
-// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8
-// CHECK-NEXT: ret double [[TMP4]]
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8
+// CHECK-NEXT: ret double [[TMP6]]
//
s1 f(int z, ...) {
__builtin_va_list list;
__builtin_va_start(list, z);
return __builtin_va_arg(list, s1);
}
+
+typedef struct {
+ struct{} a[5];
+ float b;
+ float c;
+} s2;
+
+// CHECK-LABEL: @_Z2f2iz(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S2:%.*]], align 4
+// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S2]], align 4
+// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
+// CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160
+// CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK: vaarg.in_reg:
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT: br label [[VAARG_END:%.*]]
+// CHECK: vaarg.in_mem:
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: br label [[VAARG_END]]
+// CHECK: vaarg.end:
+// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[TMP5]], align 4
+// CHECK-NEXT: ret <2 x float> [[TMP6]]
+//
+s2 f2(int z, ...) {
+ __builtin_va_list list;
+ __builtin_va_start(list, z);
+ return __builtin_va_arg(list, s2);
+}
+
+typedef struct {
+ struct{} a;
+ long long b;
+} s3;
+
+// CHECK-LABEL: @_Z2f3iz(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S3:%.*]], align 8
+// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S3]], align 8
+// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK: vaarg.in_reg:
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]]
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: br label [[VAARG_END:%.*]]
+// CHECK: vaarg.in_mem:
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: br label [[VAARG_END]]
+// CHECK: vaarg.end:
+// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-NEXT: ret i64 [[TMP6]]
+//
+s3 f3(int z, ...) {
+ __builtin_va_list list;
+ __builtin_va_start(list, z);
+ return __builtin_va_arg(list, s3);
+}
+
+typedef struct {
+ struct{} a[7];
+ short b;
+ int c;
+} s4;
+
+// CHECK-LABEL: @_Z2f4iz(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S4:%.*]], align 4
+// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S4]], align 4
+// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK: vaarg.in_reg:
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]]
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: br label [[VAARG_END:%.*]]
+// CHECK: vaarg.in_mem:
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: br label [[VAARG_END]]
+// CHECK: vaarg.end:
+// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
+// CHECK-NEXT: ret i64 [[TMP6]]
+//
+s4 f4(int z, ...) {
+ __builtin_va_list list;
+ __builtin_va_start(list, z);
+ return __builtin_va_arg(list, s4);
+}
+
+typedef struct {
+ struct{} a[5];
+ float b;
+ int c;
+} s5;
+
+// CHECK-LABEL: @_Z2f5iz(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S5:%.*]], align 4
+// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S5]], align 4
+// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK: vaarg.in_reg:
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]]
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT: br label [[VAARG_END:%.*]]
+// CHECK: vaarg.in_mem:
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT: br label [[VAARG_END]]
+// CHECK: vaarg.end:
+// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
+// CHECK-NEXT: ret i64 [[TMP6]]
+//
+s5 f5(int z, ...) {
+ __builtin_va_list list;
+ __builtin_va_start(list, z);
+ return __builtin_va_arg(list, s5);
+}
|
Ping~ |
clang/lib/CodeGen/Targets/X86.cpp
Outdated
// Copy into a temporary if the type is more aligned than the | ||
// register save area. | ||
if (TyAlign.getQuantity() > 8) { | ||
if (neededInt && TyAlign.getQuantity() > 8) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we refactor the code so it's clear we never create two temporaries for the same va_arg? Use an else-if instead of a separate if.
It looks it's possible that we end up reading past the end of the register save area for something like:
struct {
long long b;
struct{} a;
};
Can you also fix that while you're here? (Not sure that actually has any visible effect, but better to be on the safe side.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am glad to fix it.
f43535f
to
58f1942
Compare
clang/lib/CodeGen/Targets/X86.cpp
Outdated
// ``` | ||
// struct { | ||
// long long a; | ||
// int b; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This example doesn't seem right; the size of the register save area has to be a multiple of 8.
Given that, we can simplify the "RegSize" to just neededInt ? neededInt * 8 : 16
, I think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, you're right.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SSE size is 8 and SSEUp is 8?
Consider such structure:
struct {
double a;
struct {} b;
}
Maybe RegSize = neededInt ? neededInt * 8 : 8
, I think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The relevant "size" here is the size of the slots in the register save area. Which is 16 bytes for fp registers.
9eca8c1
to
203285a
Compare
Such struct types: ``` struct { struct{} a; long long b; }; stuct { struct{} a; double b; }; ``` For such structures, Lo is NoClass and Hi is Integer/SSE. And when this structure argument is passed, the high part is passed at offset 8 in memory. So we should do special handling for these types in EmitVAArg.
Fix edge cases that may reading past the end of the register save area. Such as: ``` struct { long long a; struct {} b; }; ```
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks. |
Such struct types:
For such structures, Lo is NoClass and Hi is Integer/SSE. And when this structure argument is passed, the high part is passed at offset 8 in memory. So we should do special handling for these types in EmitVAArg.Fix #79790 and fix #86371.