Skip to content

Commit 4dfea22

Browse files
authored
[ExpandMemCmp][AArch64][PowerPC][RISCV][X86] Use llvm.ucmp instead of (sub (zext (icmp ugt)), (zext (icmp ult))). (#121530)
AArch64 and PowerPC look like a improvements. RISC-V is neutral. X86 trades a dependency breaking xor before a seta for a movsx after a sbbb. Depending on how the result is used, this movsx might go away.
1 parent 1cade86 commit 4dfea22

18 files changed

+133
-189
lines changed

llvm/lib/CodeGen/ExpandMemCmp.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -696,17 +696,9 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
696696
}
697697
}
698698

699-
// The result of memcmp is negative, zero, or positive, so produce that by
700-
// subtracting 2 extended compare bits: sub (ugt, ult).
701-
// If a target prefers to use selects to get -1/0/1, they should be able
702-
// to transform this later. The inverse transform (going from selects to math)
703-
// may not be possible in the DAG because the selects got converted into
704-
// branches before we got there.
705-
Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
706-
Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
707-
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
708-
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
709-
return Builder.CreateSub(ZextUGT, ZextULT);
699+
// The result of memcmp is negative, zero, or positive.
700+
return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
701+
{Loads.Lhs, Loads.Rhs});
710702
}
711703

712704
// This function expands the memcmp call into an inline expansion and returns

llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,8 @@ define void @one_dimensional_with_store(ptr %a, ptr %b, ptr %c, i32 %N) {
313313
; CHECK-NEXT: rev w9, w9
314314
; CHECK-NEXT: cmp w9, w10
315315
; CHECK-NEXT: cset w9, hi
316-
; CHECK-NEXT: cset w10, lo
316+
; CHECK-NEXT: csinv w9, w9, wzr, hs
317317
; CHECK-NEXT: subs x8, x8, #1
318-
; CHECK-NEXT: sub w9, w9, w10
319318
; CHECK-NEXT: strb w9, [x2], #1
320319
; CHECK-NEXT: b.ne .LBB4_1
321320
; CHECK-NEXT: // %bb.2: // %for.exit

llvm/test/CodeGen/AArch64/memcmp.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,7 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
162162
; CHECK-NEXT: rev w9, w9
163163
; CHECK-NEXT: cmp w8, w9
164164
; CHECK-NEXT: cset w8, hi
165-
; CHECK-NEXT: cset w9, lo
166-
; CHECK-NEXT: sub w0, w8, w9
165+
; CHECK-NEXT: csinv w0, w8, wzr, hs
167166
; CHECK-NEXT: ret
168167
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
169168
ret i32 %m
@@ -194,8 +193,7 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
194193
; CHECK-NEXT: rev w9, w9
195194
; CHECK-NEXT: cmp w8, w9
196195
; CHECK-NEXT: cset w8, hi
197-
; CHECK-NEXT: cset w9, lo
198-
; CHECK-NEXT: sub w0, w8, w9
196+
; CHECK-NEXT: csinv w0, w8, wzr, hs
199197
; CHECK-NEXT: ret
200198
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
201199
ret i32 %m
@@ -286,8 +284,7 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
286284
; CHECK-NEXT: rev x9, x9
287285
; CHECK-NEXT: cmp x8, x9
288286
; CHECK-NEXT: cset w8, hi
289-
; CHECK-NEXT: cset w9, lo
290-
; CHECK-NEXT: sub w0, w8, w9
287+
; CHECK-NEXT: csinv w0, w8, wzr, hs
291288
; CHECK-NEXT: ret
292289
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
293290
ret i32 %m
@@ -341,8 +338,7 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
341338
; CHECK-NEXT: rev x9, x9
342339
; CHECK-NEXT: cmp x8, x9
343340
; CHECK-NEXT: cset w8, hi
344-
; CHECK-NEXT: cset w9, lo
345-
; CHECK-NEXT: sub w0, w8, w9
341+
; CHECK-NEXT: csinv w0, w8, wzr, hs
346342
; CHECK-NEXT: ret
347343
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
348344
ret i32 %m
@@ -450,8 +446,7 @@ define i32 @length8(ptr %X, ptr %Y) nounwind {
450446
; CHECK-NEXT: rev x9, x9
451447
; CHECK-NEXT: cmp x8, x9
452448
; CHECK-NEXT: cset w8, hi
453-
; CHECK-NEXT: cset w9, lo
454-
; CHECK-NEXT: sub w0, w8, w9
449+
; CHECK-NEXT: csinv w0, w8, wzr, hs
455450
; CHECK-NEXT: ret
456451
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
457452
ret i32 %m

llvm/test/CodeGen/PowerPC/memcmp.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: ldbrx 3, 0, 3
88
; CHECK-NEXT: ldbrx 4, 0, 4
9-
; CHECK-NEXT: subc 5, 4, 3
10-
; CHECK-NEXT: subfe 5, 4, 4
11-
; CHECK-NEXT: subc 4, 3, 4
12-
; CHECK-NEXT: subfe 3, 3, 3
13-
; CHECK-NEXT: neg 5, 5
9+
; CHECK-NEXT: cmpld 3, 4
10+
; CHECK-NEXT: subc 3, 4, 3
11+
; CHECK-NEXT: subfe 3, 4, 4
12+
; CHECK-NEXT: li 4, -1
1413
; CHECK-NEXT: neg 3, 3
15-
; CHECK-NEXT: sub 3, 5, 3
14+
; CHECK-NEXT: isellt 3, 4, 3
1615
; CHECK-NEXT: extsw 3, 3
1716
; CHECK-NEXT: blr
1817
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -24,12 +23,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
2423
; CHECK: # %bb.0:
2524
; CHECK-NEXT: lwbrx 3, 0, 3
2625
; CHECK-NEXT: lwbrx 4, 0, 4
26+
; CHECK-NEXT: cmplw 3, 4
2727
; CHECK-NEXT: sub 5, 4, 3
28-
; CHECK-NEXT: sub 3, 3, 4
28+
; CHECK-NEXT: li 3, -1
2929
; CHECK-NEXT: rldicl 5, 5, 1, 63
30-
; CHECK-NEXT: rldicl 3, 3, 1, 63
31-
; CHECK-NEXT: sub 3, 5, 3
32-
; CHECK-NEXT: extsw 3, 3
30+
; CHECK-NEXT: isellt 3, 3, 5
3331
; CHECK-NEXT: blr
3432
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
3533
ret i32 %call

llvm/test/CodeGen/PowerPC/memcmpIR.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,14 @@ define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonl
5959
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
6060
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
6161
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
62-
; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
63-
; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
64-
; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
65-
; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
66-
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
67-
; CHECK-NEXT: ret i32 [[SUB]]
62+
; CHECK-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[BSWAP1]], i32 [[BSWAP2]])
63+
; CHECK-NEXT: ret i32 [[UCMP]]
6864

6965
; CHECK-BE-LABEL: @test2(
7066
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
7167
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
72-
; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
73-
; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
74-
; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
75-
; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
76-
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
77-
; CHECK-BE-NEXT: ret i32 [[SUB]]
68+
; CHECK-BE-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[LOAD1]], i32 [[LOAD2]])
69+
; CHECK-BE-NEXT: ret i32 [[UCMP]]
7870

7971
entry:
8072
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)

llvm/test/CodeGen/RISCV/memcmp-optsize.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,9 +2648,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26482648
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
26492649
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
26502650
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
2651-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
2652-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
2653-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
2651+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
2652+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
2653+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
26542654
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
26552655
;
26562656
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -2661,9 +2661,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26612661
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
26622662
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
26632663
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
2664-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
2665-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
2666-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
2664+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
2665+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
2666+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
26672667
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
26682668
;
26692669
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -2672,9 +2672,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26722672
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
26732673
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
26742674
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
2675-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
2676-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
2677-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
2675+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
2676+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
2677+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
26782678
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
26792679
;
26802680
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -2685,9 +2685,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
26852685
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
26862686
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
26872687
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
2688-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
2689-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
2690-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
2688+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
2689+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
2690+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
26912691
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
26922692
;
26932693
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -3462,9 +3462,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
34623462
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
34633463
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
34643464
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
3465-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
3466-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
3467-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
3465+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
3466+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
3467+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
34683468
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
34693469
;
34703470
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -3495,9 +3495,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
34953495
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
34963496
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
34973497
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
3498-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
3499-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
3500-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
3498+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
3499+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
3500+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
35013501
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
35023502
;
35033503
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:

llvm/test/CodeGen/RISCV/memcmp.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3344,9 +3344,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33443344
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
33453345
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
33463346
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
3347-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
3348-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
3349-
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
3347+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
3348+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
3349+
; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
33503350
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
33513351
;
33523352
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -3357,9 +3357,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33573357
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
33583358
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
33593359
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
3360-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
3361-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
3362-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
3360+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
3361+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
3362+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
33633363
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
33643364
;
33653365
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -3368,9 +3368,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33683368
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
33693369
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
33703370
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
3371-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
3372-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
3373-
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
3371+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
3372+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
3373+
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
33743374
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
33753375
;
33763376
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -3381,9 +3381,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
33813381
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
33823382
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
33833383
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
3384-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
3385-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
3386-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
3384+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
3385+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
3386+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
33873387
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
33883388
;
33893389
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -4158,9 +4158,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
41584158
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
41594159
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
41604160
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
4161-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
4162-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
4163-
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
4161+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
4162+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
4163+
; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
41644164
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
41654165
;
41664166
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -4191,9 +4191,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
41914191
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
41924192
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
41934193
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
4194-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
4195-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
4196-
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
4194+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
4195+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
4196+
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
41974197
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
41984198
;
41994199
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:

llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
193193
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
194194
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
195195
; X86-NEXT: movl (%ecx), %ecx
196-
; X86-NEXT: movl (%eax), %edx
196+
; X86-NEXT: movl (%eax), %eax
197197
; X86-NEXT: bswapl %ecx
198-
; X86-NEXT: bswapl %edx
199-
; X86-NEXT: xorl %eax, %eax
200-
; X86-NEXT: cmpl %edx, %ecx
198+
; X86-NEXT: bswapl %eax
199+
; X86-NEXT: cmpl %eax, %ecx
201200
; X86-NEXT: seta %al
202-
; X86-NEXT: sbbl $0, %eax
201+
; X86-NEXT: sbbb $0, %al
202+
; X86-NEXT: movsbl %al, %eax
203203
; X86-NEXT: retl
204204
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
205205
ret i32 %m

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,14 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
179179
define i32 @length4(ptr %X, ptr %Y) nounwind {
180180
; X64-LABEL: length4:
181181
; X64: # %bb.0:
182-
; X64-NEXT: movl (%rdi), %ecx
183-
; X64-NEXT: movl (%rsi), %edx
182+
; X64-NEXT: movl (%rdi), %eax
183+
; X64-NEXT: movl (%rsi), %ecx
184+
; X64-NEXT: bswapl %eax
184185
; X64-NEXT: bswapl %ecx
185-
; X64-NEXT: bswapl %edx
186-
; X64-NEXT: xorl %eax, %eax
187-
; X64-NEXT: cmpl %edx, %ecx
186+
; X64-NEXT: cmpl %ecx, %eax
188187
; X64-NEXT: seta %al
189-
; X64-NEXT: sbbl $0, %eax
188+
; X64-NEXT: sbbb $0, %al
189+
; X64-NEXT: movsbl %al, %eax
190190
; X64-NEXT: retq
191191
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
192192
ret i32 %m
@@ -391,14 +391,14 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
391391
define i32 @length8(ptr %X, ptr %Y) nounwind {
392392
; X64-LABEL: length8:
393393
; X64: # %bb.0:
394-
; X64-NEXT: movq (%rdi), %rcx
395-
; X64-NEXT: movq (%rsi), %rdx
394+
; X64-NEXT: movq (%rdi), %rax
395+
; X64-NEXT: movq (%rsi), %rcx
396+
; X64-NEXT: bswapq %rax
396397
; X64-NEXT: bswapq %rcx
397-
; X64-NEXT: bswapq %rdx
398-
; X64-NEXT: xorl %eax, %eax
399-
; X64-NEXT: cmpq %rdx, %rcx
398+
; X64-NEXT: cmpq %rcx, %rax
400399
; X64-NEXT: seta %al
401-
; X64-NEXT: sbbl $0, %eax
400+
; X64-NEXT: sbbb $0, %al
401+
; X64-NEXT: movsbl %al, %eax
402402
; X64-NEXT: retq
403403
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
404404
ret i32 %m

llvm/test/CodeGen/X86/memcmp-optsize-x32.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
122122
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
123123
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
124124
; X86-NEXT: movl (%ecx), %ecx
125-
; X86-NEXT: movl (%eax), %edx
125+
; X86-NEXT: movl (%eax), %eax
126126
; X86-NEXT: bswapl %ecx
127-
; X86-NEXT: bswapl %edx
128-
; X86-NEXT: xorl %eax, %eax
129-
; X86-NEXT: cmpl %edx, %ecx
127+
; X86-NEXT: bswapl %eax
128+
; X86-NEXT: cmpl %eax, %ecx
130129
; X86-NEXT: seta %al
131-
; X86-NEXT: sbbl $0, %eax
130+
; X86-NEXT: sbbb $0, %al
131+
; X86-NEXT: movsbl %al, %eax
132132
; X86-NEXT: retl
133133
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
134134
ret i32 %m

0 commit comments

Comments
 (0)