Skip to content

Commit a5d290d

Browse files
authored
[SYCL][FPGA] Fix invalid memory copying of struct using fpga_reg (#3865)
When fpga_reg builtin is called with object of record type there is an extra memcpy call is generated with invalid size parameter value. Remove redundant memcpy call. Signed-off-by: Mikhail Lychkov <[email protected]>
1 parent a06bd1f commit a5d290d

File tree

2 files changed

+27
-50
lines changed

2 files changed

+27
-50
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18325,22 +18325,18 @@ RValue CodeGenFunction::EmitIntelFPGARegBuiltin(const CallExpr *E,
1832518325
ReturnValueSlot ReturnValue) {
1832618326
const Expr *PtrArg = E->getArg(0);
1832718327
QualType ArgType = PtrArg->getType();
18328-
llvm::Value *V = nullptr;
1832918328
StringRef AnnotStr = "__builtin_intel_fpga_reg";
1833018329

18331-
if (ArgType->isStructureOrClassType() || ArgType->isUnionType()) {
18332-
RValue RV = EmitAnyExpr(PtrArg);
18333-
Address A = EmitIntelFPGAFieldAnnotations(E->getExprLoc(),
18334-
RV.getAggregateAddress(),
18335-
AnnotStr);
18336-
llvm::Type *VTy = ReturnValue.getValue().getPointer()->getType();
18337-
uint64_t SizeVal = CGM.getDataLayout().getTypeAllocSize(VTy);
18338-
Builder.CreateMemCpy(ReturnValue.getValue(), A, SizeVal, false);
18330+
if (ArgType->isRecordType()) {
18331+
Address DstAddr = ReturnValue.getValue();
18332+
EmitAnyExprToMem(PtrArg, DstAddr, ArgType.getQualifiers(), true);
18333+
Address A =
18334+
EmitIntelFPGAFieldAnnotations(E->getExprLoc(), DstAddr, AnnotStr);
1833918335
return RValue::getAggregate(A);
1834018336
}
1834118337

1834218338
// if scalar type
18343-
V = EmitScalarExpr(PtrArg);
18339+
llvm::Value *V = EmitScalarExpr(PtrArg);
1834418340

1834518341
// llvm.annotation does not accept anything but integer types.
1834618342
llvm::Type *OrigVType = V->getType();

clang/test/CodeGenSYCL/intel-fpga-reg.cpp

Lines changed: 21 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
struct st {
44
int a;
55
float b;
6+
char c;
67
};
7-
// CHECK: [[T_ST:%struct[a-zA-Z0-9_.]*.st]] = type { i32, float }
8+
// CHECK: [[T_ST:%struct[a-zA-Z0-9_.]*.st]] = type { i32, float, i8 }
89

910
union un {
1011
int a;
@@ -67,41 +68,31 @@ void structs() {
6768
// CHECK-NEXT: [[S1_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[S1]] to [[T_ST]] addrspace(4)*
6869
// CHECK-NEXT: [[S2:%.*]] = alloca [[T_ST]], align 4
6970
// CHECK-NEXT: [[S2_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[S2]] to [[T_ST]] addrspace(4)*
70-
// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [[T_ST]], align 4
71-
// CHECK-NEXT: [[AGG_TEMP_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[AGG_TEMP]] to [[T_ST]] addrspace(4)*
7271
// CHECK-NEXT: [[S3:%.*]] = alloca [[T_ST]], align 4
7372
// CHECK-NEXT: [[S3_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[S3]] to [[T_ST]] addrspace(4)*
7473
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[T_ST]], align 4
7574
// CHECK-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[REF_TMP]] to [[T_ST]] addrspace(4)*
76-
// CHECK-NEXT: [[AGG_TEMP2:%.*]] = alloca [[T_ST]], align 4
77-
// CHECK-NEXT: [[AGG_TEMP2_ASCAST:%.*]] = addrspacecast [[T_ST]]* [[AGG_TEMP2]] to [[T_ST]] addrspace(4)*
7875
struct st s1;
7976

8077
struct st s2 = __builtin_intel_fpga_reg(s1);
81-
// CHECK: [[TMP_S1:%.*]] = bitcast [[T_ST]] addrspace(4)* [[AGG_TEMP_ASCAST]] to i8 addrspace(4)*
78+
// CHECK: [[TMP_S1:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S2_ASCAST]] to i8 addrspace(4)*
8279
// CHECK-NEXT: [[TMP_S2:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S1_ASCAST]] to i8 addrspace(4)*
83-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S1]], i8 addrspace(4)* align 4 [[TMP_S2]], i64 8, i1 false)
84-
// CHECK-NEXT: [[TMP_S3:%.*]] = bitcast [[T_ST]] addrspace(4)* [[AGG_TEMP_ASCAST]] to i8 addrspace(4)*
80+
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S1]], i8 addrspace(4)* align 4 [[TMP_S2]], i64 12, i1 false)
81+
// CHECK-NEXT: [[TMP_S3:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S2_ASCAST]] to i8 addrspace(4)*
8582
// CHECK-NEXT: [[TMP_S4:%.*]] = call i8 addrspace(4)* @llvm.ptr.annotation.p4i8(i8 addrspace(4)* [[TMP_S3]], [[BIFR_STR]]
8683
// CHECK-NEXT: [[TMP_S5:%.*]] = bitcast i8 addrspace(4)* [[TMP_S4]] to [[T_ST]] addrspace(4)*
87-
// CHECK-NEXT: [[TMP_S6:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S2_ASCAST]] to i8 addrspace(4)*
88-
// CHECK-NEXT: [[TMP_S7:%.*]] = bitcast [[T_ST]] addrspace(4)* [[TMP_S5]] to i8 addrspace(4)*
89-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S6]], i8 addrspace(4)* align 4 [[TMP_S7]], i64 8, i1 false)
9084

9185
struct st s3;
9286
s3 = __builtin_intel_fpga_reg(s2);
93-
// CHECK: [[TMP_S8:%.*]] = bitcast [[T_ST]] addrspace(4)* [[AGG_TEMP2_ASCAST]] to i8 addrspace(4)*
94-
// CHECK-NEXT: [[TMP_S9:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S2_ASCAST]] to i8 addrspace(4)*
95-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S8]], i8 addrspace(4)* align 4 [[TMP_S9]], i64 8, i1 false)
96-
// CHECK-NEXT: [[TMP_S10:%.*]] = bitcast [[T_ST]] addrspace(4)* [[AGG_TEMP2_ASCAST]] to i8 addrspace(4)*
97-
// CHECK-NEXT: [[TMP_S11:%.*]] = call i8 addrspace(4)* @llvm.ptr.annotation.p4i8(i8 addrspace(4)* [[TMP_S10]], [[BIFR_STR]]
98-
// CHECK-NEXT: [[TMP_S12:%.*]] = bitcast i8 addrspace(4)* [[TMP_S11]] to [[T_ST]] addrspace(4)*
99-
// CHECK-NEXT: [[TMP_S13:%.*]] = bitcast [[T_ST]] addrspace(4)* [[REF_TMP_ASCAST]] to i8 addrspace(4)*
100-
// CHECK-NEXT: [[TMP_S14:%.*]] = bitcast [[T_ST]] addrspace(4)* [[TMP_S12]] to i8 addrspace(4)*
101-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S13]], i8 addrspace(4)* align 4 [[TMP_S14]], i64 8, i1 false)
102-
// CHECK-NEXT: [[TMP_S15:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S3_ASCAST]] to i8 addrspace(4)*
103-
// CHECK-NEXT: [[TMP_S16:%.*]] = bitcast [[T_ST]] addrspace(4)* [[REF_TMP_ASCAST]] to i8 addrspace(4)*
104-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S15]], i8 addrspace(4)* align 4 [[TMP_S16]], i64 8, i1 false)
87+
// CHECK: [[TMP_S6:%.*]] = bitcast [[T_ST]] addrspace(4)* [[REF_TMP_ASCAST]] to i8 addrspace(4)*
88+
// CHECK-NEXT: [[TMP_S7:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S2_ASCAST]] to i8 addrspace(4)*
89+
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S6]], i8 addrspace(4)* align 4 [[TMP_S7]], i64 12, i1 false)
90+
// CHECK-NEXT: [[TMP_S8:%.*]] = bitcast [[T_ST]] addrspace(4)* [[REF_TMP_ASCAST]] to i8 addrspace(4)*
91+
// CHECK-NEXT: [[TMP_S9:%.*]] = call i8 addrspace(4)* @llvm.ptr.annotation.p4i8(i8 addrspace(4)* [[TMP_S8]], [[BIFR_STR]]
92+
// CHECK-NEXT: [[TMP_S10:%.*]] = bitcast i8 addrspace(4)* [[TMP_S9]] to [[T_ST]] addrspace(4)*
93+
// CHECK-NEXT: [[TMP_S11:%.*]] = bitcast [[T_ST]] addrspace(4)* [[S3_ASCAST]] to i8 addrspace(4)*
94+
// CHECK-NEXT: [[TMP_S12:%.*]] = bitcast [[T_ST]] addrspace(4)* [[REF_TMP_ASCAST]] to i8 addrspace(4)*
95+
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_S11]], i8 addrspace(4)* align 4 [[TMP_S12]], i64 12, i1 false)
10596
}
10697

10798
void unions() {
@@ -111,45 +102,35 @@ void unions() {
111102
// CHECK-NEXT: [[U2_ASCAST:%.*]] = addrspacecast [[T_UN]]* [[U2]] to [[T_UN]] addrspace(4)*
112103
// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[T_UN]], align 4
113104
// CHECK-NEXT: [[REF_TMP2_ASCAST:%.*]] = addrspacecast [[T_UN]]* [[REF_TMP2]] to [[T_UN]] addrspace(4)*
114-
// CHECK-NEXT: [[AGG_TEMP3:%.*]] = alloca [[T_UN]], align 4
115-
// CHECK-NEXT: [[AGG_TEMP3_ASCAST:%.*]] = addrspacecast [[T_UN]]* [[AGG_TEMP3]] to [[T_UN]] addrspace(4)*
116105
union un u1;
117106
union un u2;
118107

119108
u2 = __builtin_intel_fpga_reg(u1);
120-
// CHECK: [[TMP_U1:%.*]] = bitcast [[T_UN]] addrspace(4)* [[AGG_TEMP3_ASCAST]] to i8 addrspace(4)*
109+
// CHECK: [[TMP_U1:%.*]] = bitcast [[T_UN]] addrspace(4)* [[REF_TMP2_ASCAST]] to i8 addrspace(4)*
121110
// CHECK-NEXT: [[TMP_U2:%.*]] = bitcast [[T_UN]] addrspace(4)* [[U1_ASCAST]] to i8 addrspace(4)*
122111
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_U1]], i8 addrspace(4)* align 4 [[TMP_U2]], i64 4, i1 false)
123-
// CHECK-NEXT: [[TMP_U3:%.*]] = bitcast [[T_UN]] addrspace(4)* [[AGG_TEMP3_ASCAST]] to i8 addrspace(4)*
112+
// CHECK-NEXT: [[TMP_U3:%.*]] = bitcast [[T_UN]] addrspace(4)* [[REF_TMP2_ASCAST]] to i8 addrspace(4)*
124113
// CHECK-NEXT: [[TMP_U4:%.*]] = call i8 addrspace(4)* @llvm.ptr.annotation.p4i8(i8 addrspace(4)* [[TMP_U3]], [[BIFR_STR]]
125114
// CHECK-NEXT: [[TMP_U5:%.*]] = bitcast i8 addrspace(4)* [[TMP_U4]] to [[T_UN]] addrspace(4)*
126-
// CHECK-NEXT: [[TMP_U6:%.*]] = bitcast [[T_UN]] addrspace(4)* [[REF_TMP2_ASCAST]] to i8 addrspace(4)*
127-
// CHECK-NEXT: [[TMP_U7:%.*]] = bitcast [[T_UN]] addrspace(4)* [[TMP_U5]] to i8 addrspace(4)*
128-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_U6]], i8 addrspace(4)* align 4 [[TMP_U7]], i64 8, i1 false)
129-
// CHECK-NEXT: [[TMP_U8:%.*]] = bitcast [[T_UN]] addrspace(4)* [[U2_ASCAST]] to i8 addrspace(4)*
130-
// CHECK-NEXT: [[TMP_U9:%.*]] = bitcast [[T_UN]] addrspace(4)* [[REF_TMP2_ASCAST]] to i8 addrspace(4)*
131-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_U8]], i8 addrspace(4)* align 4 [[TMP_U9]], i64 4, i1 false)
115+
// CHECK-NEXT: [[TMP_U6:%.*]] = bitcast [[T_UN]] addrspace(4)* [[U2_ASCAST]] to i8 addrspace(4)*
116+
// CHECK-NEXT: [[TMP_U7:%.*]] = bitcast [[T_UN]] addrspace(4)* [[REF_TMP2_ASCAST]] to i8 addrspace(4)*
117+
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_U6]], i8 addrspace(4)* align 4 [[TMP_U7]], i64 4, i1 false)
132118
}
133119

134120
void classes() {
135121
// CHECK: [[CA:%.*]] = alloca [[T_CL:%.*]], align 4
136122
// CHECK-NEXT: [[CA_ASCAST:%.*]] = addrspacecast [[T_CL]]* [[CA]] to [[T_CL]] addrspace(4)*
137123
// CHECK-NEXT: [[CB:%.*]] = alloca [[T_CL]], align 4
138124
// CHECK-NEXT: [[CB_ASCAST:%.*]] = addrspacecast [[T_CL]]* [[CB]] to [[T_CL]] addrspace(4)*
139-
// CHECK-NEXT: [[AGG_TEMP5:%.*]] = alloca [[T_CL]], align 4
140-
// CHECK-NEXT: [[AGG_TEMP5_ASCAST:%.*]] = addrspacecast [[T_CL]]*
141125
A ca(213);
142126

143127
A cb = __builtin_intel_fpga_reg(ca);
144-
// CHECK: [[TMP_C1:%.*]] = bitcast [[T_CL]] addrspace(4)* [[AGG_TEMP5_ASCAST]] to i8 addrspace(4)*
128+
// CHECK: [[TMP_C1:%.*]] = bitcast [[T_CL]] addrspace(4)* [[CB_ASCAST]] to i8 addrspace(4)*
145129
// CHECK-NEXT: [[TMP_C2:%.*]] = bitcast [[T_CL]] addrspace(4)* [[CA_ASCAST]] to i8 addrspace(4)*
146130
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_C1]], i8 addrspace(4)* align 4 [[TMP_C2]], i64 4, i1 false)
147-
// CHECK-NEXT: [[TMP_C3:%.*]] = bitcast [[T_CL]] addrspace(4)* [[AGG_TEMP5_ASCAST]] to i8 addrspace(4)*
131+
// CHECK-NEXT: [[TMP_C3:%.*]] = bitcast [[T_CL]] addrspace(4)* [[CB_ASCAST]] to i8 addrspace(4)*
148132
// CHECK-NEXT: [[TMP_C4:%.*]] = call i8 addrspace(4)* @llvm.ptr.annotation.p4i8(i8 addrspace(4)* [[TMP_C3]], [[BIFR_STR]]
149133
// CHECK-NEXT: [[TMP_C5:%.*]] = bitcast i8 addrspace(4)* [[TMP_C4]] to [[T_CL]] addrspace(4)*
150-
// CHECK-NEXT: [[TMP_C6:%.*]] = bitcast [[T_CL]] addrspace(4)* [[CB_ASCAST]] to i8 addrspace(4)*
151-
// CHECK-NEXT: [[TMP_C7:%.*]] = bitcast [[T_CL]] addrspace(4)* [[TMP_C5]] to i8 addrspace(4)*
152-
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[TMP_C6]], i8 addrspace(4)* align 4 [[TMP_C7]], i64 8, i1 false)
153134
}
154135

155136
void pointers() {

0 commit comments

Comments
 (0)