[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612

RKSimon · 2024-09-06T17:13:15Z

Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.

As SUB is not commutative, we have to be more careful and only accept LHS matches.

…n to handle ADD/SUB as well Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB. As SUB is not commutative, we have to be more careful and only accept LHS matches.

llvmbot · 2024-09-06T17:13:46Z

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.

As SUB is not commutative, we have to be more careful and only accept LHS matches.

Full diff: https://github.com/llvm/llvm-project/pull/107612.diff

3 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+23-9)
(modified) llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll (+30-36)
(modified) llvm/test/CodeGen/X86/select.ll (+32-40)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e20633e041072f..839b87dd5d4dd8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24084,18 +24084,32 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
   if (!CmpVT.isScalarInteger() || !VT.isScalarInteger())
     return SDValue();
 
-  // Convert OR/XOR 'identity' patterns (iff X is 0 or 1):
-  // select (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
-  // select (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+  // Convert 'identity' patterns (iff X is 0 or 1):
+  // SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
   if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
       CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
     SDValue Src1, Src2;
     auto isIdentityPattern = [&]() {
-      if ((RHS.getOpcode() == ISD::XOR || RHS.getOpcode() == ISD::OR) &&
-          (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS)) {
-        Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
-        Src2 = LHS;
-        return true;
+      switch (RHS.getOpcode()) {
+      case ISD::OR:
+      case ISD::XOR:
+      case ISD::ADD:
+        if (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS) {
+          Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
+          Src2 = LHS;
+          return true;
+        }
+        break;
+      case ISD::SUB:
+        if (RHS.getOperand(0) == LHS) {
+          Src1 = RHS.getOperand(1);
+          Src2 = LHS;
+          return true;
+        }
+        break;
       }
       return false;
     };
@@ -24113,7 +24127,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
             DAG.getConstant(1, DL, VT));
       SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
       SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
-      return DAG.getNode(RHS.getOpcode(), DL, VT, And, Src2);  // And Op y
+      return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And);  // y Op And
     }
   }
 
diff --git a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
index def4c08a3592ec..86a8a6a53248b7 100644
--- a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
+++ b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
@@ -191,12 +191,11 @@ define i32 @add_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_signbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB6_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB6_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -220,12 +219,11 @@ define i32 @add_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_nosignbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB7_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB7_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -425,12 +423,11 @@ define i32 @add_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_signbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB14_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB14_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -454,12 +451,11 @@ define i32 @add_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_nosignbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB15_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB15_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -659,12 +655,11 @@ define i32 @add_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_signbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB22_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB22_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -688,12 +683,11 @@ define i32 @add_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: add_nosignbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB23_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB23_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index aa7f509327002f..760c8355aa4572 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -1725,11 +1725,10 @@ define i32 @select_add(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_add:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB36_2
-; MCU-NEXT:  # %bb.1: # %entry
-; MCU-NEXT:    addl %edx, %eax
-; MCU-NEXT:  .LBB36_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    addl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1773,11 +1772,10 @@ define i32 @select_add_b(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_add_b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB37_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    addl %edx, %eax
-; MCU-NEXT:  .LBB37_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    addl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1819,11 +1817,10 @@ define i32 @select_add_1(i32 %A, i32 %B, i32 %cond) {
 ;
 ; MCU-LABEL: select_add_1:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB38_2
-; MCU-NEXT:  # %bb.1: # %entry
-; MCU-NEXT:    addl %edx, %eax
-; MCU-NEXT:  .LBB38_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    addl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i32 %cond, 1
@@ -1867,11 +1864,10 @@ define i32 @select_add_1b(i32 %A, i32 %B, i32 %cond) {
 ;
 ; MCU-LABEL: select_add_1b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB39_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    addl %edx, %eax
-; MCU-NEXT:  .LBB39_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    addl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i32 %cond, 1
@@ -1901,11 +1897,10 @@ define i32 @select_sub(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_sub:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB40_2
-; MCU-NEXT:  # %bb.1: # %entry
-; MCU-NEXT:    subl %eax, %edx
-; MCU-NEXT:  .LBB40_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %eax, %ecx
+; MCU-NEXT:    subl %ecx, %edx
 ; MCU-NEXT:    movl %edx, %eax
 ; MCU-NEXT:    retl
 entry:
@@ -1938,11 +1933,10 @@ define i32 @select_sub_b(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_sub_b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB41_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    subl %eax, %edx
-; MCU-NEXT:  .LBB41_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %eax, %ecx
+; MCU-NEXT:    subl %ecx, %edx
 ; MCU-NEXT:    movl %edx, %eax
 ; MCU-NEXT:    retl
 entry:
@@ -1973,11 +1967,10 @@ define i32 @select_sub_1(i32 %A, i32 %B, i32 %cond) {
 ;
 ; MCU-LABEL: select_sub_1:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB42_2
-; MCU-NEXT:  # %bb.1: # %entry
-; MCU-NEXT:    subl %eax, %edx
-; MCU-NEXT:  .LBB42_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %eax, %ecx
+; MCU-NEXT:    subl %ecx, %edx
 ; MCU-NEXT:    movl %edx, %eax
 ; MCU-NEXT:    retl
 entry:
@@ -2010,11 +2003,10 @@ define i32 @select_sub_1b(i32 %A, i32 %B, i32 %cond) {
 ;
 ; MCU-LABEL: select_sub_1b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB43_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    subl %eax, %edx
-; MCU-NEXT:  .LBB43_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %eax, %ecx
+; MCU-NEXT:    subl %ecx, %edx
 ; MCU-NEXT:    movl %edx, %eax
 ; MCU-NEXT:    retl
 entry:

goldsteinn · 2024-09-06T17:36:09Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
+  // SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
  if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
      CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
    SDValue Src1, Src2;
    auto isIdentityPattern = [&]() {


In the middle end we have getBinOpIdentity, maybe we should provide a generic helper in SelectionDAG?

Yes, DAG has llvm::isNeutralConstant which is similar, but no getNeutralConstant equivalent - we could add something that LowerSELECTWithCmpZero could use.

We should probably rename isNeutralConstant to isIdentityConstant as well to be consistent with other implementations.

goldsteinn

LGTM

llvm-ci · 2024-09-07T10:49:35Z

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building llvm at step 10 "Add check check-offload".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/5129

Here is the relevant piece of the build log for the reference

Step 10 (Add check check-offload) failure: test (failure)
******************** TEST 'libomptarget :: amdgcn-amd-amdhsa :: sanitizer/ptr_outside_alloc_2.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa -O3 /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/sanitizer/Output/ptr_outside_alloc_2.c.tmp /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa -O3 /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/sanitizer/Output/ptr_outside_alloc_2.c.tmp /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a
# RUN: at line 3
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/sanitizer/Output/ptr_outside_alloc_2.c.tmp 2>&1 | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c --check-prefixes=CHECK
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/sanitizer/Output/ptr_outside_alloc_2.c.tmp
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c --check-prefixes=CHECK
# .---command stderr------------
# | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c:21:11: error: CHECK: expected string not found in input
# | // CHECK: OFFLOAD ERROR: Memory access fault by GPU {{.*}} (agent 0x{{.*}}) at virtual address [[PTR:0x[0-9a-z]*]]. Reasons: {{.*}}
# |           ^
# | <stdin>:1:1: note: scanning from here
# | AMDGPU error: Error in hsa_amd_memory_pool_allocate: HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to allocate the necessary resources. This error may also occur when the core runtime library needs to spawn threads or create internal OS-specific events.
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/sanitizer/ptr_outside_alloc_2.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |           1: AMDGPU error: Error in hsa_amd_memory_pool_allocate: HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to allocate the necessary resources. This error may also occur when the core runtime library needs to spawn threads or create internal OS-specific events. 
# | check:21     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |           2: AMDGPU error: Error in hsa_amd_memory_pool_allocate: HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to allocate the necessary resources. This error may also occur when the core runtime library needs to spawn threads or create internal OS-specific events. 
# | check:21     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           3: "PluginInterface" error: Failure to allocate device memory: Failed to allocate from memory manager 
# | check:21     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           4: omptarget error: Call to getTargetPointer returned null pointer (device failure or illegal mapping). 
# | check:21     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           5: omptarget error: Call to targetDataBegin failed, abort target. 
# | check:21     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           6: omptarget error: Failed to process data before launching the kernel. 
# | check:21     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           .
# |           .
# |           .
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

********************

RKSimon requested review from phoebewang and goldsteinn September 6, 2024 17:13

llvmbot added the backend:X86 label Sep 6, 2024

goldsteinn reviewed Sep 6, 2024

View reviewed changes

goldsteinn approved these changes Sep 6, 2024

View reviewed changes

RKSimon merged commit 6df1291 into llvm:main Sep 7, 2024
7 of 10 checks passed

RKSimon deleted the x86-branchless-select-addsub branch September 7, 2024 10:24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612

[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612

Uh oh!

RKSimon commented Sep 6, 2024

Uh oh!

llvmbot commented Sep 6, 2024

Uh oh!

goldsteinn Sep 6, 2024

Uh oh!

RKSimon Sep 6, 2024

Uh oh!

goldsteinn left a comment

Uh oh!

Uh oh!

llvm-ci commented Sep 7, 2024

Uh oh!

Uh oh!

[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612

[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well #107612

Uh oh!

Conversation

RKSimon commented Sep 6, 2024

Uh oh!

llvmbot commented Sep 6, 2024

Uh oh!

goldsteinn Sep 6, 2024

Choose a reason for hiding this comment

Uh oh!

RKSimon Sep 6, 2024

Choose a reason for hiding this comment

Uh oh!

goldsteinn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Sep 7, 2024

Uh oh!

Uh oh!