Skip to content

Commit 73b8c4a

Browse files
committed
[X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX
Fixes: #82242 The idea is that AVX doesn't support comparisons for `v8i32` so it splits the comparison into 2x `v4i32` comparisons + reconstruction of the `v8i32`. By converting to a float, we can handle the comparison with 1/2 instructions (1 if we can `bitcast`, 2 if we need to cast with `sitofp`). The Proofs: https://alive2.llvm.org/ce/z/AJDdQ8 Timeout, but they can be reproduced locally.
1 parent c0d5e32 commit 73b8c4a

34 files changed

+2351
-2289
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,13 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
775775
} while (!Worklist.empty());
776776
}
777777

778+
#if 0
779+
#define NOAH_DUMP(v, todo) \
780+
dbgs() << "-------------------- STAGE" << #v << " ---------------------\n"; \
781+
todo;
782+
#else
783+
#define NOAH_DUMP(v, todo)
784+
#endif
778785
void SelectionDAGISel::CodeGenAndEmitDAG() {
779786
StringRef GroupName = "sdag";
780787
StringRef GroupDescription = "Instruction Selection and Scheduling";
@@ -815,6 +822,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
815822
if (ViewDAGCombine1 && MatchFilterBB)
816823
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
817824

825+
NOAH_DUMP(0, CurDAG->dump());
818826
// Run the DAG combiner in pre-legalize mode.
819827
{
820828
NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
@@ -838,6 +846,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
838846
CurDAG->viewGraph("legalize-types input for " + BlockName);
839847

840848
bool Changed;
849+
NOAH_DUMP(1, CurDAG->dump());
841850
{
842851
NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
843852
GroupDescription, TimePassesIsEnabled);
@@ -896,6 +905,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
896905
CurDAG->VerifyDAGDivergence();
897906
#endif
898907

908+
NOAH_DUMP(2, CurDAG->dump());
899909
{
900910
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
901911
GroupDescription, TimePassesIsEnabled);
@@ -915,6 +925,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
915925
if (ViewDAGCombineLT && MatchFilterBB)
916926
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
917927

928+
NOAH_DUMP(3, CurDAG->dump());
918929
// Run the DAG combiner in post-type-legalize mode.
919930
{
920931
NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
@@ -936,6 +947,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
936947
if (ViewLegalizeDAGs && MatchFilterBB)
937948
CurDAG->viewGraph("legalize input for " + BlockName);
938949

950+
NOAH_DUMP(4, CurDAG->dump());
939951
{
940952
NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
941953
GroupDescription, TimePassesIsEnabled);
@@ -955,6 +967,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
955967
if (ViewDAGCombine2 && MatchFilterBB)
956968
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
957969

970+
971+
NOAH_DUMP(5, CurDAG->dump());
958972
// Run the DAG combiner in post-legalize mode.
959973
{
960974
NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
@@ -967,6 +981,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
967981
<< "'\n";
968982
CurDAG->dump());
969983

984+
NOAH_DUMP(6, CurDAG->dump());
985+
970986
#ifndef NDEBUG
971987
if (TTI.hasBranchDivergence())
972988
CurDAG->VerifyDAGDivergence();

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/CodeGen/TargetLowering.h"
14+
#include "llvm/ADT/APFloat.h"
1415
#include "llvm/ADT/STLExtras.h"
1516
#include "llvm/Analysis/VectorUtils.h"
1617
#include "llvm/CodeGen/CallingConvLower.h"
@@ -21,6 +22,7 @@
2122
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
2223
#include "llvm/CodeGen/MachineRegisterInfo.h"
2324
#include "llvm/CodeGen/SelectionDAG.h"
25+
#include "llvm/CodeGen/SelectionDAGNodes.h"
2426
#include "llvm/CodeGen/TargetRegisterInfo.h"
2527
#include "llvm/IR/DataLayout.h"
2628
#include "llvm/IR/DerivedTypes.h"
@@ -936,6 +938,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
936938
Depth);
937939
}
938940

941+
939942
// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940943
// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
941944
static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
@@ -2471,6 +2474,8 @@ bool TargetLowering::SimplifyDemandedBits(
24712474
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
24722475
Depth + 1))
24732476
return true;
2477+
2478+
24742479
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
24752480
assert(Known.getBitWidth() == InBits && "Src width has changed?");
24762481

0 commit comments

Comments
 (0)