Skip to content

Commit a283607

Browse files
committed
[X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z))
The reason for this inversion is to utilize the `andn` instruction, which in turn produces less assembly code. This is the assembly we produced previously: ``` not rcx and rsi, rdx andn rax, rsi, rdi or rcx, rdx and rax, rcx ret ``` The assembly with the inversion: ``` and rsi, rdx andn rcx, rdx, rcx andn rax, rsi, rdi andn rax, rcx, rax ret ```
1 parent f00c946 commit a283607

File tree

3 files changed

+70
-1
lines changed

3 files changed

+70
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49633,6 +49633,32 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
4963349633
(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
4963449634
}
4963549635

49636+
/// InstCombine converts:
49637+
/// `(and X, ~(and ~Y, Z))`
49638+
/// to
49639+
/// `(and X, (or Y, ~Z))`
49640+
///
49641+
/// But we should undo this transformation if the `andn` instruction is
49642+
/// available to us.
49643+
static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG,
49644+
const X86Subtarget &Subtarget) {
49645+
49646+
using namespace llvm::SDPatternMatch;
49647+
MVT VT = N->getSimpleValueType(0);
49648+
SDLoc DL(N);
49649+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
49650+
if (TLI.hasAndNot(SDValue(N, 0))) {
49651+
SDValue X, Y, Z;
49652+
if (sd_match(N, m_And(m_Value(X),
49653+
m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
49654+
return DAG.getNode(
49655+
ISD::AND, DL, VT, X,
49656+
DAG.getNOT(
49657+
DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z), VT));
49658+
}
49659+
return SDValue();
49660+
}
49661+
4963649662
// This function recognizes cases where X86 bzhi instruction can replace and
4963749663
// 'and-load' sequence.
4963849664
// In case of loading integer value from an array of constants which is defined
@@ -50130,6 +50156,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5013050156
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
5013150157
return R;
5013250158

50159+
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG, Subtarget))
50160+
return R;
50161+
5013350162
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
5013450163
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
5013550164
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?

llvm/test/CodeGen/X86/avx512vl-logic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ define <4 x i32> @ternlog_or_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
980980
define <4 x i32> @ternlog_and_orn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
981981
; CHECK-LABEL: ternlog_and_orn:
982982
; CHECK: ## %bb.0:
983-
; CHECK-NEXT: vpternlogd $176, %xmm1, %xmm2, %xmm0
983+
; CHECK-NEXT: vpternlogd $208, %xmm2, %xmm1, %xmm0
984984
; CHECK-NEXT: retq
985985
%a = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
986986
%b = or <4 x i32> %a, %y

llvm/test/CodeGen/X86/pr108731.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-gnu-unknown -mcpu=znver3 | FileCheck %s
3+
4+
define dso_local i64 @foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %Entry
7+
; CHECK-NEXT: andq %rdx, %rsi
8+
; CHECK-NEXT: andnq %rcx, %rdx, %rcx
9+
; CHECK-NEXT: andnq %rdi, %rsi, %rax
10+
; CHECK-NEXT: andnq %rax, %rcx, %rax
11+
; CHECK-NEXT: retq
12+
Entry:
13+
%4 = and i64 %2, %1
14+
%5 = xor i64 %4, -1
15+
%6 = and i64 %5, %0
16+
%.not = xor i64 %3, -1
17+
%7 = or i64 %.not, %2
18+
%8 = and i64 %6, %7
19+
ret i64 %8
20+
}
21+
22+
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
23+
24+
define dso_local <16 x i8> @fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
25+
; CHECK-LABEL: fooVec:
26+
; CHECK: # %bb.0: # %Entry
27+
; CHECK-NEXT: vandps %xmm1, %xmm2, %xmm1
28+
; CHECK-NEXT: vandnps %xmm3, %xmm2, %xmm2
29+
; CHECK-NEXT: vandnps %xmm0, %xmm1, %xmm0
30+
; CHECK-NEXT: vandnps %xmm0, %xmm2, %xmm0
31+
; CHECK-NEXT: retq
32+
Entry:
33+
%4 = and <16 x i8> %2, %1
34+
%5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
35+
%6 = and <16 x i8> %5, %0
36+
%.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
37+
%7 = or <16 x i8> %.not, %2
38+
%8 = and <16 x i8> %6, %7
39+
ret <16 x i8> %8
40+
}

0 commit comments

Comments
 (0)