Skip to content

Commit 71fe983

Browse files
committed
[X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z))
The reason for this inversion is to utilize the `andn` instruction, which in turn produces less assembly code. This is the assembly we produced previously: ``` not rcx and rsi, rdx andn rax, rsi, rdi or rcx, rdx and rax, rcx ret ``` The assembly with the inversion: ``` and rsi, rdx andn rcx, rdx, rcx andn rax, rsi, rdi andn rax, rcx, rax ret ```
1 parent 12e8e0b commit 71fe983

File tree

3 files changed

+70
-1
lines changed

3 files changed

+70
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50034,6 +50034,32 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
5003450034
(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
5003550035
}
5003650036

50037+
/// InstCombine converts:
50038+
/// `(and X, ~(and ~Y, Z))`
50039+
/// to
50040+
/// `(and X, (or Y, ~Z))`
50041+
///
50042+
/// But we should undo this transformation if the `andn` instruction is
50043+
/// available to us.
50044+
static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG,
50045+
const X86Subtarget &Subtarget) {
50046+
50047+
using namespace llvm::SDPatternMatch;
50048+
MVT VT = N->getSimpleValueType(0);
50049+
SDLoc DL(N);
50050+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
50051+
if (TLI.hasAndNot(SDValue(N, 0))) {
50052+
SDValue X, Y, Z;
50053+
if (sd_match(N, m_And(m_Value(X),
50054+
m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
50055+
return DAG.getNode(
50056+
ISD::AND, DL, VT, X,
50057+
DAG.getNOT(
50058+
DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z), VT));
50059+
}
50060+
return SDValue();
50061+
}
50062+
5003750063
// This function recognizes cases where X86 bzhi instruction can replace and
5003850064
// 'and-load' sequence.
5003950065
// In case of loading integer value from an array of constants which is defined
@@ -50531,6 +50557,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5053150557
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
5053250558
return R;
5053350559

50560+
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG, Subtarget))
50561+
return R;
50562+
5053450563
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
5053550564
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
5053650565
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?

llvm/test/CodeGen/X86/avx512vl-logic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ define <4 x i32> @ternlog_or_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
980980
define <4 x i32> @ternlog_and_orn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
981981
; CHECK-LABEL: ternlog_and_orn:
982982
; CHECK: ## %bb.0:
983-
; CHECK-NEXT: vpternlogd $176, %xmm1, %xmm2, %xmm0
983+
; CHECK-NEXT: vpternlogd $208, %xmm2, %xmm1, %xmm0
984984
; CHECK-NEXT: retq
985985
%a = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
986986
%b = or <4 x i32> %a, %y

llvm/test/CodeGen/X86/pr108731.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-gnu-unknown -mcpu=znver3 | FileCheck %s
3+
4+
define dso_local i64 @foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %Entry
7+
; CHECK-NEXT: andq %rdx, %rsi
8+
; CHECK-NEXT: andnq %rcx, %rdx, %rcx
9+
; CHECK-NEXT: andnq %rdi, %rsi, %rax
10+
; CHECK-NEXT: andnq %rax, %rcx, %rax
11+
; CHECK-NEXT: retq
12+
Entry:
13+
%4 = and i64 %2, %1
14+
%5 = xor i64 %4, -1
15+
%6 = and i64 %5, %0
16+
%.not = xor i64 %3, -1
17+
%7 = or i64 %.not, %2
18+
%8 = and i64 %6, %7
19+
ret i64 %8
20+
}
21+
22+
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
23+
24+
define dso_local <16 x i8> @fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
25+
; CHECK-LABEL: fooVec:
26+
; CHECK: # %bb.0: # %Entry
27+
; CHECK-NEXT: vandps %xmm1, %xmm2, %xmm1
28+
; CHECK-NEXT: vandnps %xmm3, %xmm2, %xmm2
29+
; CHECK-NEXT: vandnps %xmm0, %xmm1, %xmm0
30+
; CHECK-NEXT: vandnps %xmm0, %xmm2, %xmm0
31+
; CHECK-NEXT: retq
32+
Entry:
33+
%4 = and <16 x i8> %2, %1
34+
%5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
35+
%6 = and <16 x i8> %5, %0
36+
%.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
37+
%7 = or <16 x i8> %.not, %2
38+
%8 = and <16 x i8> %6, %7
39+
ret <16 x i8> %8
40+
}

0 commit comments

Comments
 (0)