llvm
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 14 additions & 5 deletions b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 14 additions & 5 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
Lines changed: 1 addition & 2 deletions b/‎llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
Lines changed: 1 addition & 2 deletions
@@ -15928,11 +15928,20 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
       return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG);
     }
 
-    // Vectors that are less than 64 bits get widened to neatly fit a 64 bit
-    // register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to
-    // this element size leads to the best codegen, since e.g. setcc results
-    // might need to be truncated otherwise.
-    EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u));
+    // Results of setcc operations get widened to 128 bits if their input
+    // operands are 128 bits wide and in case of reduce_and and reduce_or have
+    // at least 4 elements, otherwise vectors that are less than 64 bits get
+    // widened to neatly fit a 64 bit register, so e.g. <4 x i1> gets lowered to
+    // either <4 x i16> or <4 x i32>. Sign extending to this element size leads
+    // to the best codegen, since e.g. setcc results might need to be truncated
+    // otherwise.
+    unsigned ExtendedWidth = 64;
+    if ((ScalarOpcode == ISD::XOR || NumElems >= 4) &&
+        Vec.getOpcode() == ISD::SETCC &&
+        Vec.getOperand(0).getValueSizeInBits() >= 128) {
+      ExtendedWidth = 128;
+    }
+    EVT ExtendedVT = MVT::getIntegerVT(std::max(ExtendedWidth / NumElems, 8u));
 
     // any_ext doesn't work with umin/umax, so only use it for uadd.
     unsigned ExtendOp =
 
@@ -12,8 +12,7 @@ define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    umaxv h0, v0.8h
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret