Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit e872521

Browse files
committed
Merging r308986 and r308963:
------------------------------------------------------------------------ r308963 | rksimon | 2017-07-25 03:33:36 -0700 (Tue, 25 Jul 2017) | 1 line [X86] Add 24-byte memcmp tests (PR33914) ------------------------------------------------------------------------ ------------------------------------------------------------------------ r308986 | rksimon | 2017-07-25 10:04:37 -0700 (Tue, 25 Jul 2017) | 9 lines [X86][CGP] Reduce memcmp() expansion to 2 load pairs (PR33914) D35067/rL308322 attempted to support up to 4 load pairs for memcmp inlining which resulted in regressions for some optimized libc memcmp implementations (PR33914). Until we can match these more optimal cases, this patch reduces the memcmp expansion to a maximum of 2 load pairs (which matches what we do for -Os). This patch should be considered for the 5.0.0 release branch as well Differential Revision: https://reviews.llvm.org/D35830 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@309127 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e622303 commit e872521

File tree

5 files changed

+411
-1258
lines changed

5 files changed

+411
-1258
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,8 +1672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
16721672

16731673
// TODO: These control memcmp expansion in CGP and could be raised higher, but
16741674
// that needs to benchmarked and balanced with the potential use of vector
1675-
// load/store types (PR33329).
1676-
MaxLoadsPerMemcmp = 4;
1675+
// load/store types (PR33329, PR33914).
1676+
MaxLoadsPerMemcmp = 2;
16771677
MaxLoadsPerMemcmpOptSize = 2;
16781678

16791679
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).

test/CodeGen/X86/memcmp-minsize.ll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,93 @@ define i1 @length16_eq_const(i8* %X) nounwind minsize {
527527
ret i1 %c
528528
}
529529

530+
; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
531+
532+
define i32 @length24(i8* %X, i8* %Y) nounwind minsize {
533+
; X86-LABEL: length24:
534+
; X86: # BB#0:
535+
; X86-NEXT: subl $16, %esp
536+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
537+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
538+
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
539+
; X86-NEXT: movl %eax, (%esp)
540+
; X86-NEXT: andl $0, {{[0-9]+}}(%esp)
541+
; X86-NEXT: movl $24, {{[0-9]+}}(%esp)
542+
; X86-NEXT: calll memcmp
543+
; X86-NEXT: addl $16, %esp
544+
; X86-NEXT: retl
545+
;
546+
; X64-LABEL: length24:
547+
; X64: # BB#0:
548+
; X64-NEXT: pushq $24
549+
; X64-NEXT: popq %rdx
550+
; X64-NEXT: jmp memcmp # TAILCALL
551+
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
552+
ret i32 %m
553+
}
554+
555+
define i1 @length24_eq(i8* %x, i8* %y) nounwind minsize {
556+
; X86-LABEL: length24_eq:
557+
; X86: # BB#0:
558+
; X86-NEXT: subl $16, %esp
559+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
560+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
561+
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
562+
; X86-NEXT: movl %eax, (%esp)
563+
; X86-NEXT: andl $0, {{[0-9]+}}(%esp)
564+
; X86-NEXT: movl $24, {{[0-9]+}}(%esp)
565+
; X86-NEXT: calll memcmp
566+
; X86-NEXT: testl %eax, %eax
567+
; X86-NEXT: sete %al
568+
; X86-NEXT: addl $16, %esp
569+
; X86-NEXT: retl
570+
;
571+
; X64-LABEL: length24_eq:
572+
; X64: # BB#0:
573+
; X64-NEXT: pushq %rax
574+
; X64-NEXT: pushq $24
575+
; X64-NEXT: popq %rdx
576+
; X64-NEXT: callq memcmp
577+
; X64-NEXT: testl %eax, %eax
578+
; X64-NEXT: sete %al
579+
; X64-NEXT: popq %rcx
580+
; X64-NEXT: retq
581+
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
582+
%cmp = icmp eq i32 %call, 0
583+
ret i1 %cmp
584+
}
585+
586+
define i1 @length24_eq_const(i8* %X) nounwind minsize {
587+
; X86-LABEL: length24_eq_const:
588+
; X86: # BB#0:
589+
; X86-NEXT: subl $16, %esp
590+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
591+
; X86-NEXT: movl %eax, (%esp)
592+
; X86-NEXT: andl $0, {{[0-9]+}}(%esp)
593+
; X86-NEXT: movl $24, {{[0-9]+}}(%esp)
594+
; X86-NEXT: movl $.L.str, {{[0-9]+}}(%esp)
595+
; X86-NEXT: calll memcmp
596+
; X86-NEXT: testl %eax, %eax
597+
; X86-NEXT: setne %al
598+
; X86-NEXT: addl $16, %esp
599+
; X86-NEXT: retl
600+
;
601+
; X64-LABEL: length24_eq_const:
602+
; X64: # BB#0:
603+
; X64-NEXT: pushq %rax
604+
; X64-NEXT: pushq $24
605+
; X64-NEXT: popq %rdx
606+
; X64-NEXT: movl $.L.str, %esi
607+
; X64-NEXT: callq memcmp
608+
; X64-NEXT: testl %eax, %eax
609+
; X64-NEXT: setne %al
610+
; X64-NEXT: popq %rcx
611+
; X64-NEXT: retq
612+
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
613+
%c = icmp ne i32 %m, 0
614+
ret i1 %c
615+
}
616+
530617
define i32 @length32(i8* %X, i8* %Y) nounwind minsize {
531618
; X86-LABEL: length32:
532619
; X86: # BB#0:

test/CodeGen/X86/memcmp-optsize.ll

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,82 @@ define i1 @length16_eq_const(i8* %X) nounwind optsize {
699699
ret i1 %c
700700
}
701701

702+
; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
703+
704+
define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
705+
; X86-LABEL: length24:
706+
; X86: # BB#0:
707+
; X86-NEXT: pushl $0
708+
; X86-NEXT: pushl $24
709+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
710+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
711+
; X86-NEXT: calll memcmp
712+
; X86-NEXT: addl $16, %esp
713+
; X86-NEXT: retl
714+
;
715+
; X64-LABEL: length24:
716+
; X64: # BB#0:
717+
; X64-NEXT: movl $24, %edx
718+
; X64-NEXT: jmp memcmp # TAILCALL
719+
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
720+
ret i32 %m
721+
}
722+
723+
define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
724+
; X86-LABEL: length24_eq:
725+
; X86: # BB#0:
726+
; X86-NEXT: pushl $0
727+
; X86-NEXT: pushl $24
728+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
729+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
730+
; X86-NEXT: calll memcmp
731+
; X86-NEXT: addl $16, %esp
732+
; X86-NEXT: testl %eax, %eax
733+
; X86-NEXT: sete %al
734+
; X86-NEXT: retl
735+
;
736+
; X64-LABEL: length24_eq:
737+
; X64: # BB#0:
738+
; X64-NEXT: pushq %rax
739+
; X64-NEXT: movl $24, %edx
740+
; X64-NEXT: callq memcmp
741+
; X64-NEXT: testl %eax, %eax
742+
; X64-NEXT: sete %al
743+
; X64-NEXT: popq %rcx
744+
; X64-NEXT: retq
745+
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
746+
%cmp = icmp eq i32 %call, 0
747+
ret i1 %cmp
748+
}
749+
750+
define i1 @length24_eq_const(i8* %X) nounwind optsize {
751+
; X86-LABEL: length24_eq_const:
752+
; X86: # BB#0:
753+
; X86-NEXT: pushl $0
754+
; X86-NEXT: pushl $24
755+
; X86-NEXT: pushl $.L.str
756+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
757+
; X86-NEXT: calll memcmp
758+
; X86-NEXT: addl $16, %esp
759+
; X86-NEXT: testl %eax, %eax
760+
; X86-NEXT: setne %al
761+
; X86-NEXT: retl
762+
;
763+
; X64-LABEL: length24_eq_const:
764+
; X64: # BB#0:
765+
; X64-NEXT: pushq %rax
766+
; X64-NEXT: movl $.L.str, %esi
767+
; X64-NEXT: movl $24, %edx
768+
; X64-NEXT: callq memcmp
769+
; X64-NEXT: testl %eax, %eax
770+
; X64-NEXT: setne %al
771+
; X64-NEXT: popq %rcx
772+
; X64-NEXT: retq
773+
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
774+
%c = icmp ne i32 %m, 0
775+
ret i1 %c
776+
}
777+
702778
define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
703779
; X86-LABEL: length32:
704780
; X86: # BB#0:

0 commit comments

Comments
 (0)