Skip to content

Commit 921bd29

Browse files
committed
[OpenMP] Remove alignment for global <-> local reduction functions
The alignment did likely not help much but increases the memory requirement. Note that half of the affected accesses are all performed by a single thread in each block. The reads are by consecutive threads in a single block.
1 parent abe71b7 commit 921bd29

File tree

4 files changed

+61
-83
lines changed

4 files changed

+61
-83
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,6 @@ class ExecutionRuntimeModesRAII {
8585
~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
8686
};
8787

88-
/// GPU Configuration: This information can be derived from cuda registers,
89-
/// however, providing compile time constants helps generate more efficient
90-
/// code. For all practical purposes this is fine because the configuration
91-
/// is the same for all known NVPTX architectures.
92-
enum MachineConfiguration : unsigned {
93-
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
94-
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size
95-
96-
/// Global memory alignment for performance.
97-
GlobalMemoryAlignment = 128,
98-
};
99-
10088
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
10189
RefExpr = RefExpr->IgnoreParens();
10290
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
@@ -119,31 +107,23 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
119107
return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
120108
}
121109

122-
123110
static RecordDecl *buildRecordForGlobalizedVars(
124111
ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
125112
ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
126113
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
127-
&MappedDeclsFields, int BufSize) {
114+
&MappedDeclsFields,
115+
int BufSize) {
128116
using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>;
129117
if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
130118
return nullptr;
131119
SmallVector<VarsDataTy, 4> GlobalizedVars;
132120
for (const ValueDecl *D : EscapedDecls)
133-
GlobalizedVars.emplace_back(
134-
CharUnits::fromQuantity(std::max(
135-
C.getDeclAlign(D).getQuantity(),
136-
static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
137-
D);
121+
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
138122
for (const ValueDecl *D : EscapedDeclsForTeams)
139123
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
140-
llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
141-
return L.first > R.first;
142-
});
143124

144125
// Build struct _globalized_locals_ty {
145-
// /* globalized vars */[WarSize] align (max(decl_align,
146-
// GlobalMemoryAlignment))
126+
// /* globalized vars */[WarSize] align (decl_align)
147127
// /* globalized vars */ for EscapedDeclsForTeams
148128
// };
149129
RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
@@ -182,9 +162,7 @@ static RecordDecl *buildRecordForGlobalizedVars(
182162
/*BW=*/nullptr, /*Mutable=*/false,
183163
/*InitStyle=*/ICIS_NoInit);
184164
Field->setAccess(AS_public);
185-
llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
186-
static_cast<CharUnits::QuantityType>(
187-
GlobalMemoryAlignment)));
165+
llvm::APInt Align(32, Pair.first.getQuantity());
188166
Field->addAttr(AlignedAttr::CreateImplicit(
189167
C, /*IsAlignmentExpr=*/true,
190168
IntegerLiteral::Create(C, Align,

clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ int bar(int n){
253253
// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
254254
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
255255
// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
256-
// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
256+
// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8
257257
// CHECK1-NEXT: ret void
258258
//
259259
//
@@ -294,7 +294,7 @@ int bar(int n){
294294
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
295295
// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
296296
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
297-
// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
297+
// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8
298298
// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
299299
// CHECK1-NEXT: ret void
300300
//
@@ -583,13 +583,13 @@ int bar(int n){
583583
// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
584584
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
585585
// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
586-
// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
586+
// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4
587587
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
588588
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
589589
// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
590590
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
591591
// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
592-
// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
592+
// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
593593
// CHECK1-NEXT: ret void
594594
//
595595
//
@@ -634,13 +634,13 @@ int bar(int n){
634634
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
635635
// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
636636
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
637-
// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
637+
// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4
638638
// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
639639
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
640640
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
641641
// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
642642
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
643-
// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
643+
// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
644644
// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
645645
// CHECK1-NEXT: ret void
646646
//
@@ -1156,13 +1156,13 @@ int bar(int n){
11561156
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
11571157
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
11581158
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
1159-
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
1159+
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4
11601160
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
11611161
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
11621162
// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
11631163
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
11641164
// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
1165-
// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
1165+
// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4
11661166
// CHECK1-NEXT: ret void
11671167
//
11681168
//
@@ -1207,13 +1207,13 @@ int bar(int n){
12071207
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
12081208
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
12091209
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
1210-
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
1210+
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
12111211
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
12121212
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
12131213
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
12141214
// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
12151215
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
1216-
// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
1216+
// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4
12171217
// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
12181218
// CHECK1-NEXT: ret void
12191219
//
@@ -1446,7 +1446,7 @@ int bar(int n){
14461446
// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
14471447
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
14481448
// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
1449-
// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
1449+
// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8
14501450
// CHECK2-NEXT: ret void
14511451
//
14521452
//
@@ -1487,7 +1487,7 @@ int bar(int n){
14871487
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
14881488
// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
14891489
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]]
1490-
// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
1490+
// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8
14911491
// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
14921492
// CHECK2-NEXT: ret void
14931493
//
@@ -1776,13 +1776,13 @@ int bar(int n){
17761776
// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
17771777
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
17781778
// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
1779-
// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
1779+
// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4
17801780
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
17811781
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
17821782
// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
17831783
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
17841784
// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
1785-
// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
1785+
// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
17861786
// CHECK2-NEXT: ret void
17871787
//
17881788
//
@@ -1827,13 +1827,13 @@ int bar(int n){
18271827
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
18281828
// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
18291829
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
1830-
// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
1830+
// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4
18311831
// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
18321832
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
18331833
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
18341834
// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
18351835
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]]
1836-
// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
1836+
// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
18371837
// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
18381838
// CHECK2-NEXT: ret void
18391839
//
@@ -2349,13 +2349,13 @@ int bar(int n){
23492349
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
23502350
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
23512351
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
2352-
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
2352+
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4
23532353
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
23542354
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
23552355
// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
23562356
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
23572357
// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
2358-
// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
2358+
// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4
23592359
// CHECK2-NEXT: ret void
23602360
//
23612361
//
@@ -2400,13 +2400,13 @@ int bar(int n){
24002400
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
24012401
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
24022402
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
2403-
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
2403+
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
24042404
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
24052405
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
24062406
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
24072407
// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
24082408
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
2409-
// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
2409+
// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4
24102410
// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
24112411
// CHECK2-NEXT: ret void
24122412
//
@@ -2639,7 +2639,7 @@ int bar(int n){
26392639
// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
26402640
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
26412641
// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
2642-
// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128
2642+
// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8
26432643
// CHECK3-NEXT: ret void
26442644
//
26452645
//
@@ -2680,7 +2680,7 @@ int bar(int n){
26802680
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
26812681
// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0
26822682
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]]
2683-
// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128
2683+
// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8
26842684
// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
26852685
// CHECK3-NEXT: ret void
26862686
//
@@ -2969,13 +2969,13 @@ int bar(int n){
29692969
// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
29702970
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
29712971
// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
2972-
// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128
2972+
// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4
29732973
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
29742974
// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
29752975
// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
29762976
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
29772977
// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
2978-
// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128
2978+
// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
29792979
// CHECK3-NEXT: ret void
29802980
//
29812981
//
@@ -3020,13 +3020,13 @@ int bar(int n){
30203020
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
30213021
// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0
30223022
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]]
3023-
// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128
3023+
// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4
30243024
// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
30253025
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
30263026
// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
30273027
// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1
30283028
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]]
3029-
// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128
3029+
// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
30303030
// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
30313031
// CHECK3-NEXT: ret void
30323032
//
@@ -3542,13 +3542,13 @@ int bar(int n){
35423542
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
35433543
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
35443544
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
3545-
// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128
3545+
// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4
35463546
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
35473547
// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
35483548
// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
35493549
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
35503550
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
3551-
// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128
3551+
// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4
35523552
// CHECK3-NEXT: ret void
35533553
//
35543554
//
@@ -3593,13 +3593,13 @@ int bar(int n){
35933593
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
35943594
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0
35953595
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]]
3596-
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128
3596+
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
35973597
// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
35983598
// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
35993599
// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
36003600
// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1
36013601
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]]
3602-
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128
3602+
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4
36033603
// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
36043604
// CHECK3-NEXT: ret void
36053605
//

0 commit comments

Comments
 (0)