Skip to content

Commit 1104056

Browse files
committed
[X86] preferABDSToABSWithNSW - use ABDS for i32/i64 if we have CMOV
Now that we have better ABDS lowering, prefer cmov(sub(x,y),sub(y,x)) to cmov(abs(sub(x,y)),sub(x,y)) to improve ILP
1 parent fa38527 commit 1104056

File tree

3 files changed

+45
-35
lines changed

3 files changed

+45
-35
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58210,7 +58210,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5821058210
}
5821158211

5821258212
bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {
58213-
return false;
58213+
return Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64);
5821458214
}
5821558215

5821658216
// Prefer (non-AVX512) vector TRUNCATE(SIGN_EXTEND_INREG(X)) to use of PACKSS.

llvm/test/CodeGen/X86/abds-neg.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,19 +1027,22 @@ define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
10271027
define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
10281028
; X86-LABEL: abd_subnsw_i32:
10291029
; X86: # %bb.0:
1030+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10301031
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1031-
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
1032-
; X86-NEXT: movl %ecx, %eax
1032+
; X86-NEXT: movl %ecx, %edx
1033+
; X86-NEXT: subl %eax, %edx
1034+
; X86-NEXT: subl %ecx, %eax
1035+
; X86-NEXT: cmovll %edx, %eax
10331036
; X86-NEXT: negl %eax
1034-
; X86-NEXT: cmovnsl %ecx, %eax
10351037
; X86-NEXT: retl
10361038
;
10371039
; X64-LABEL: abd_subnsw_i32:
10381040
; X64: # %bb.0:
1039-
; X64-NEXT: subl %esi, %edi
10401041
; X64-NEXT: movl %edi, %eax
1042+
; X64-NEXT: subl %esi, %eax
1043+
; X64-NEXT: subl %edi, %esi
1044+
; X64-NEXT: cmovgel %esi, %eax
10411045
; X64-NEXT: negl %eax
1042-
; X64-NEXT: cmovnsl %edi, %eax
10431046
; X64-NEXT: retq
10441047
%sub = sub nsw i32 %a, %b
10451048
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -1050,19 +1053,22 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
10501053
define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
10511054
; X86-LABEL: abd_subnsw_i32_undef:
10521055
; X86: # %bb.0:
1056+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10531057
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1054-
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
1055-
; X86-NEXT: movl %ecx, %eax
1058+
; X86-NEXT: movl %ecx, %edx
1059+
; X86-NEXT: subl %eax, %edx
1060+
; X86-NEXT: subl %ecx, %eax
1061+
; X86-NEXT: cmovll %edx, %eax
10561062
; X86-NEXT: negl %eax
1057-
; X86-NEXT: cmovnsl %ecx, %eax
10581063
; X86-NEXT: retl
10591064
;
10601065
; X64-LABEL: abd_subnsw_i32_undef:
10611066
; X64: # %bb.0:
1062-
; X64-NEXT: subl %esi, %edi
10631067
; X64-NEXT: movl %edi, %eax
1068+
; X64-NEXT: subl %esi, %eax
1069+
; X64-NEXT: subl %edi, %esi
1070+
; X64-NEXT: cmovgel %esi, %eax
10641071
; X64-NEXT: negl %eax
1065-
; X64-NEXT: cmovnsl %edi, %eax
10661072
; X64-NEXT: retq
10671073
%sub = sub nsw i32 %a, %b
10681074
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -1090,10 +1096,11 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
10901096
;
10911097
; X64-LABEL: abd_subnsw_i64:
10921098
; X64: # %bb.0:
1093-
; X64-NEXT: subq %rsi, %rdi
10941099
; X64-NEXT: movq %rdi, %rax
1100+
; X64-NEXT: subq %rsi, %rax
1101+
; X64-NEXT: subq %rdi, %rsi
1102+
; X64-NEXT: cmovgeq %rsi, %rax
10951103
; X64-NEXT: negq %rax
1096-
; X64-NEXT: cmovnsq %rdi, %rax
10971104
; X64-NEXT: retq
10981105
%sub = sub nsw i64 %a, %b
10991106
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -1121,10 +1128,11 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
11211128
;
11221129
; X64-LABEL: abd_subnsw_i64_undef:
11231130
; X64: # %bb.0:
1124-
; X64-NEXT: subq %rsi, %rdi
11251131
; X64-NEXT: movq %rdi, %rax
1132+
; X64-NEXT: subq %rsi, %rax
1133+
; X64-NEXT: subq %rdi, %rsi
1134+
; X64-NEXT: cmovgeq %rsi, %rax
11261135
; X64-NEXT: negq %rax
1127-
; X64-NEXT: cmovnsq %rdi, %rax
11281136
; X64-NEXT: retq
11291137
%sub = sub nsw i64 %a, %b
11301138
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)

llvm/test/CodeGen/X86/abds.ll

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -928,19 +928,20 @@ define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
928928
define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
929929
; X86-LABEL: abd_subnsw_i32:
930930
; X86: # %bb.0:
931+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
931932
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
932-
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
933-
; X86-NEXT: movl %ecx, %eax
934-
; X86-NEXT: negl %eax
935-
; X86-NEXT: cmovsl %ecx, %eax
933+
; X86-NEXT: movl %ecx, %edx
934+
; X86-NEXT: subl %eax, %edx
935+
; X86-NEXT: subl %ecx, %eax
936+
; X86-NEXT: cmovll %edx, %eax
936937
; X86-NEXT: retl
937938
;
938939
; X64-LABEL: abd_subnsw_i32:
939940
; X64: # %bb.0:
940-
; X64-NEXT: subl %esi, %edi
941941
; X64-NEXT: movl %edi, %eax
942-
; X64-NEXT: negl %eax
943-
; X64-NEXT: cmovsl %edi, %eax
942+
; X64-NEXT: subl %esi, %eax
943+
; X64-NEXT: subl %edi, %esi
944+
; X64-NEXT: cmovgel %esi, %eax
944945
; X64-NEXT: retq
945946
%sub = sub nsw i32 %a, %b
946947
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -950,19 +951,20 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
950951
define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
951952
; X86-LABEL: abd_subnsw_i32_undef:
952953
; X86: # %bb.0:
954+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
953955
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
954-
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
955-
; X86-NEXT: movl %ecx, %eax
956-
; X86-NEXT: negl %eax
957-
; X86-NEXT: cmovsl %ecx, %eax
956+
; X86-NEXT: movl %ecx, %edx
957+
; X86-NEXT: subl %eax, %edx
958+
; X86-NEXT: subl %ecx, %eax
959+
; X86-NEXT: cmovll %edx, %eax
958960
; X86-NEXT: retl
959961
;
960962
; X64-LABEL: abd_subnsw_i32_undef:
961963
; X64: # %bb.0:
962-
; X64-NEXT: subl %esi, %edi
963964
; X64-NEXT: movl %edi, %eax
964-
; X64-NEXT: negl %eax
965-
; X64-NEXT: cmovsl %edi, %eax
965+
; X64-NEXT: subl %esi, %eax
966+
; X64-NEXT: subl %edi, %esi
967+
; X64-NEXT: cmovgel %esi, %eax
966968
; X64-NEXT: retq
967969
%sub = sub nsw i32 %a, %b
968970
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -986,10 +988,10 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
986988
;
987989
; X64-LABEL: abd_subnsw_i64:
988990
; X64: # %bb.0:
989-
; X64-NEXT: subq %rsi, %rdi
990991
; X64-NEXT: movq %rdi, %rax
991-
; X64-NEXT: negq %rax
992-
; X64-NEXT: cmovsq %rdi, %rax
992+
; X64-NEXT: subq %rsi, %rax
993+
; X64-NEXT: subq %rdi, %rsi
994+
; X64-NEXT: cmovgeq %rsi, %rax
993995
; X64-NEXT: retq
994996
%sub = sub nsw i64 %a, %b
995997
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -1013,10 +1015,10 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
10131015
;
10141016
; X64-LABEL: abd_subnsw_i64_undef:
10151017
; X64: # %bb.0:
1016-
; X64-NEXT: subq %rsi, %rdi
10171018
; X64-NEXT: movq %rdi, %rax
1018-
; X64-NEXT: negq %rax
1019-
; X64-NEXT: cmovsq %rdi, %rax
1019+
; X64-NEXT: subq %rsi, %rax
1020+
; X64-NEXT: subq %rdi, %rsi
1021+
; X64-NEXT: cmovgeq %rsi, %rax
10201022
; X64-NEXT: retq
10211023
%sub = sub nsw i64 %a, %b
10221024
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)

0 commit comments

Comments
 (0)