Skip to content

[mlir][vector] Add 1D vector.deinterleave lowering #93042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 66 additions & 2 deletions mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,70 @@ struct VectorInterleaveOpLowering
}
};

/// Conversion pattern for a `vector.deinterleave`.
/// This supports fixed-sized vectors and scalable vectors.
struct VectorDeinterleaveOpLowering
: public ConvertOpToLLVMPattern<vector::DeinterleaveOp> {
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;

LogicalResult
matchAndRewrite(vector::DeinterleaveOp deinterleaveOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType resultType = deinterleaveOp.getResultVectorType();
VectorType sourceType = deinterleaveOp.getSourceVectorType();
auto loc = deinterleaveOp.getLoc();

// Note: n-D deinterleave operations should be lowered to the 1-D before
// converting to LLVM.
if (resultType.getRank() != 1)
return rewriter.notifyMatchFailure(deinterleaveOp,
"DeinterleaveOp not rank 1");

if (resultType.isScalable()) {
auto llvmTypeConverter = this->getTypeConverter();
auto deinterleaveResults = deinterleaveOp.getResultTypes();
auto packedOpResults =
llvmTypeConverter->packOperationResults(deinterleaveResults);
auto intrinsic = rewriter.create<LLVM::vector_deinterleave2>(
loc, packedOpResults, adaptor.getSource());

auto evenResult = rewriter.create<LLVM::ExtractValueOp>(
loc, intrinsic->getResult(0), 0);
auto oddResult = rewriter.create<LLVM::ExtractValueOp>(
loc, intrinsic->getResult(0), 1);

rewriter.replaceOp(deinterleaveOp, ValueRange{evenResult, oddResult});
return success();
}
// Lower fixed-size deinterleave to two shufflevectors. While the
// vector.deinterleave2 intrinsic supports fixed and scalable vectors, the
// langref still recommends fixed-vectors use shufflevector, see:
// https://llvm.org/docs/LangRef.html#id889.
int64_t resultVectorSize = resultType.getNumElements();
SmallVector<int32_t> evenShuffleMask;
SmallVector<int32_t> oddShuffleMask;

evenShuffleMask.reserve(resultVectorSize);
oddShuffleMask.reserve(resultVectorSize);

for (int i = 0; i < sourceType.getNumElements(); ++i) {
if (i % 2 == 0)
evenShuffleMask.push_back(i);
else
oddShuffleMask.push_back(i);
}

auto poison = rewriter.create<LLVM::PoisonOp>(loc, sourceType);
auto evenShuffle = rewriter.create<LLVM::ShuffleVectorOp>(
loc, adaptor.getSource(), poison, evenShuffleMask);
auto oddShuffle = rewriter.create<LLVM::ShuffleVectorOp>(
loc, adaptor.getSource(), poison, oddShuffleMask);

rewriter.replaceOp(deinterleaveOp, ValueRange{evenShuffle, oddShuffle});
return success();
}
};

} // namespace

/// Populate the given list with patterns that convert from Vector to LLVM.
Expand All @@ -1785,8 +1849,8 @@ void mlir::populateVectorToLLVMConversionPatterns(
VectorExpandLoadOpConversion, VectorCompressStoreOpConversion,
VectorSplatOpLowering, VectorSplatNdOpLowering,
VectorScalableInsertOpLowering, VectorScalableExtractOpLowering,
MaskedReductionOpConversion, VectorInterleaveOpLowering>(
converter);
MaskedReductionOpConversion, VectorInterleaveOpLowering,
VectorDeinterleaveOpLowering>(converter);
// Transfer ops with rank > 1 are handled by VectorToSCF.
populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
}
Expand Down
22 changes: 22 additions & 0 deletions mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2546,3 +2546,25 @@ func.func @vector_interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]
%0 = vector.interleave %a, %b : vector<2x[8]xi16>
return %0 : vector<2x[16]xi16>
}

// -----

// CHECK-LABEL: @vector_deinterleave_1d
// CHECK-SAME: (%[[SRC:.*]]: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>)
func.func @vector_deinterleave_1d(%a: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) {
// CHECK: %[[POISON:.*]] = llvm.mlir.poison : vector<4xi32>
// CHECK: llvm.shufflevector %[[SRC]], %[[POISON]] [0, 2] : vector<4xi32>
// CHECK: llvm.shufflevector %[[SRC]], %[[POISON]] [1, 3] : vector<4xi32>
%0, %1 = vector.deinterleave %a : vector<4xi32> -> vector<2xi32>
return %0, %1 : vector<2xi32>, vector<2xi32>
}

// CHECK-LABEL: @vector_deinterleave_1d_scalable
// CHECK-SAME: %[[SRC:.*]]: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>)
func.func @vector_deinterleave_1d_scalable(%a: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) {
// CHECK: %[[RES:.*]] = "llvm.intr.vector.deinterleave2"(%[[SRC]]) : (vector<[4]xi32>) -> !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
// CHECK: llvm.extractvalue %[[RES]][0] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
// CHECK: llvm.extractvalue %[[RES]][1] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
%0, %1 = vector.deinterleave %a : vector<[4]xi32> -> vector<[2]xi32>
return %0, %1 : vector<[2]xi32>, vector<[2]xi32>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// DEFINE: %{entry_point} = entry
// DEFINE: %{compile} = mlir-opt %s -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd -march=aarch64 -mattr=+sve \
// DEFINE: -e %{entry_point} -entry-point-result=void \
// DEFINE: -shared-libs=%mlir_c_runner_utils,%mlir_arm_runner_utils

// RUN: %{compile} | %{run} | FileCheck %s

func.func @entry() {
// Set the vector length to 256-bit (equivalent to vscale=2).
// This allows the checks (below) to look at an entire vector.
%c256 = arith.constant 256 : i32
func.call @setArmVLBits(%c256) : (i32) -> ()
func.call @test_deinterleave() : () -> ()
return
}

func.func @test_deinterleave() {
%step_vector = llvm.intr.experimental.stepvector : vector<[4]xi8>
vector.print %step_vector : vector<[4]xi8>
// CHECK: ( 0, 1, 2, 3, 4, 5, 6, 7 )
%v1, %v2 = vector.deinterleave %step_vector : vector<[4]xi8> -> vector<[2]xi8>
vector.print %v1 : vector<[2]xi8>
vector.print %v2 : vector<[2]xi8>
// CHECK: ( 0, 2, 4, 6 )
// CHECK: ( 1, 3, 5, 7 )
return
}

func.func private @setArmVLBits(%bits : i32)
18 changes: 18 additions & 0 deletions mlir/test/Integration/Dialect/Vector/CPU/test-deinterleave.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: mlir-opt %s -test-lower-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_c_runner_utils | \
// RUN: FileCheck %s

func.func @entry() {
%v0 = arith.constant dense<[1, 2, 3, 4]> : vector<4xi8>
vector.print %v0 : vector<4xi8>
// CHECK: ( 1, 2, 3, 4 )

%v1, %v2 = vector.deinterleave %v0 : vector<4xi8> -> vector<2xi8>
vector.print %v1 : vector<2xi8>
vector.print %v2 : vector<2xi8>
// CHECK: ( 1, 3 )
// CHECK: ( 2, 4 )

return
}
Loading