Skip to content

Commit fca6992

Browse files
authored
[AArch64] Fix a minor issue with AArch64LoopIdiomTransform (#78136)
I found another case where in the end block we could have a PHI that we deal with incorrectly. The two incoming values are unique - one of them is the induction variable and another one is a value defined outside the loop, e.g. %final_val = phi i32 [ %inc, %while.body ], [ %d, %while.cond ] We won't correctly select between the two values in the new end block that we create and so we will get the wrong result.
1 parent fc02532 commit fca6992

File tree

2 files changed

+100
-5
lines changed

2 files changed

+100
-5
lines changed

llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -367,11 +367,12 @@ bool AArch64LoopIdiomTransform::recognizeByteCompare() {
367367
Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB);
368368

369369
// The value of the index when leaving the while.cond block is always the
370-
// same as the end value (MaxLen) so we permit either. Otherwise for any
371-
// other value defined outside the loop we only allow values that are the
372-
// same as the exit value for while.body.
373-
if (WhileCondVal != Index && WhileCondVal != MaxLen &&
374-
WhileCondVal != WhileBodyVal)
370+
// same as the end value (MaxLen) so we permit either. The value when
371+
// leaving the while.body block should only be the index. Otherwise for
372+
// any other values we only allow ones that are same for both blocks.
373+
if (WhileCondVal != WhileBodyVal &&
374+
((WhileCondVal != Index && WhileCondVal != MaxLen) ||
375+
(WhileBodyVal != Index)))
375376
return false;
376377
}
377378
}

llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,100 @@ while.end:
11901190
}
11911191

11921192

1193+
; Similar to @compare_bytes_simple, except in the while.end block we have an extra PHI
1194+
; with unique values for each incoming block from the loop.
1195+
define i32 @compare_bytes_simple3(ptr %a, ptr %b, ptr %c, i32 %d, i32 %len, i32 %n) {
1196+
; CHECK-LABEL: define i32 @compare_bytes_simple3(
1197+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1198+
; CHECK-NEXT: entry:
1199+
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
1200+
; CHECK: while.cond:
1201+
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1202+
; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
1203+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1204+
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1205+
; CHECK: while.body:
1206+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1207+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1208+
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1209+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1210+
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1211+
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1212+
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
1213+
; CHECK: while.end:
1214+
; CHECK-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
1215+
; CHECK-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
1216+
; CHECK-NEXT: ret i32 [[FINAL_VAL]]
1217+
;
1218+
; LOOP-DEL-LABEL: define i32 @compare_bytes_simple3(
1219+
; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1220+
; LOOP-DEL-NEXT: entry:
1221+
; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]]
1222+
; LOOP-DEL: while.cond:
1223+
; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1224+
; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
1225+
; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1226+
; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1227+
; LOOP-DEL: while.body:
1228+
; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1229+
; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1230+
; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1231+
; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1232+
; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1233+
; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1234+
; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
1235+
; LOOP-DEL: while.end:
1236+
; LOOP-DEL-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
1237+
; LOOP-DEL-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
1238+
; LOOP-DEL-NEXT: ret i32 [[FINAL_VAL]]
1239+
;
1240+
; NO-TRANSFORM-LABEL: define i32 @compare_bytes_simple3(
1241+
; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[D:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) {
1242+
; NO-TRANSFORM-NEXT: entry:
1243+
; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]]
1244+
; NO-TRANSFORM: while.cond:
1245+
; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
1246+
; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
1247+
; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
1248+
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
1249+
; NO-TRANSFORM: while.body:
1250+
; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
1251+
; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
1252+
; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1253+
; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
1254+
; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
1255+
; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
1256+
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
1257+
; NO-TRANSFORM: while.end:
1258+
; NO-TRANSFORM-NEXT: [[FINAL_VAL:%.*]] = phi i32 [ [[D]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
1259+
; NO-TRANSFORM-NEXT: store i32 [[FINAL_VAL]], ptr [[C]], align 4
1260+
; NO-TRANSFORM-NEXT: ret i32 [[FINAL_VAL]]
1261+
;
1262+
entry:
1263+
br label %while.cond
1264+
1265+
while.cond:
1266+
%len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
1267+
%inc = add i32 %len.addr, 1
1268+
%cmp.not = icmp eq i32 %inc, %n
1269+
br i1 %cmp.not, label %while.end, label %while.body
1270+
1271+
while.body:
1272+
%idxprom = zext i32 %inc to i64
1273+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
1274+
%0 = load i8, ptr %arrayidx
1275+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
1276+
%1 = load i8, ptr %arrayidx2
1277+
%cmp.not2 = icmp eq i8 %0, %1
1278+
br i1 %cmp.not2, label %while.cond, label %while.end
1279+
1280+
while.end:
1281+
%final_val = phi i32 [ %d, %while.body ], [ %inc, %while.cond ]
1282+
store i32 %final_val, ptr %c
1283+
ret i32 %final_val
1284+
}
1285+
1286+
11931287
define i32 @compare_bytes_sign_ext(ptr %a, ptr %b, i32 %len, i32 %n) {
11941288
; CHECK-LABEL: define i32 @compare_bytes_sign_ext(
11951289
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)