Skip to content

[GEN] Implement GPU to GEN lowering #13427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions mlir/include/mlir/Conversion/GPUToGEN/GPUToGEN.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===- GPUToGEN.h - GPU to GEN Passes ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides passes to convert GPU dialect to GEN dialect.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_CONVERSION_GPUTOGEN_GPUTOGEN_H
#define MLIR_CONVERSION_GPUTOGEN_GPUTOGEN_H

#include <memory>

namespace mlir {

class Pass;
class RewritePatternSet;

#define GEN_PASS_DECL_CONVERTGPUOPSTOGENOPS
#include "mlir/Conversion/Passes.h.inc"

void populateGPUToGENPatterns(RewritePatternSet &patterns);

} // namespace mlir
#endif // MLIR_CONVERSION_GPUTOGEN_GPUTOGEN_H
1 change: 1 addition & 0 deletions mlir/include/mlir/Conversion/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "mlir/Conversion/GENToLLVM/GENToLLVM.h"
#include "mlir/Conversion/GENToSPIRV/GENToSPIRV.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToGEN/GPUToGEN.h"
#include "mlir/Conversion/GPUToGENX/GPUToGENXPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
Expand Down
9 changes: 9 additions & 0 deletions mlir/include/mlir/Conversion/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,15 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> {
let dependentDialects = ["LLVM::LLVMDialect"];
}

//===----------------------------------------------------------------------===//
// GPUToGEN
//===----------------------------------------------------------------------===//

def ConvertGpuOpsToGENOps : Pass<"convert-gpu-to-gen"> {
let summary = "Generate GEN operations for gpu operations";
let dependentDialects = ["GEN::GENDialect"];
}

//===----------------------------------------------------------------------===//
// GPUToGENX
//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ add_subdirectory(FuncToSPIRV)
add_subdirectory(GENToLLVM)
add_subdirectory(GENToSPIRV)
add_subdirectory(GPUCommon)
add_subdirectory(GPUToGEN)
add_subdirectory(GPUToGENX)
add_subdirectory(GPUToNVVM)
add_subdirectory(GPUToROCDL)
Expand Down
16 changes: 16 additions & 0 deletions mlir/lib/Conversion/GPUToGEN/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
add_mlir_conversion_library(MLIRGPUToGEN
GPUToGEN.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/GPUToGEN

DEPENDS
MLIRConversionPassIncGen

LINK_COMPONENTS
Core

LINK_LIBS PUBLIC
MLIRGPUDialect
MLIRGENDialect
)
134 changes: 134 additions & 0 deletions mlir/lib/Conversion/GPUToGEN/GPUToGEN.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
//===- GPUToGEN.cpp - GPU to GEN Patterns ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements patterns to convert GPU dialect to GEN dialect.
//
//===----------------------------------------------------------------------===//

#include "mlir/Conversion/GPUToGEN/GPUToGEN.h"

#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GEN/IR/GENDialect.h"
#include "mlir/Dialect/GEN/IR/GENOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Matchers.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"

#include "llvm/Support/Debug.h"

namespace mlir {
#define GEN_PASS_DEF_CONVERTGPUOPSTOGENOPS
#include "mlir/Conversion/Passes.h.inc"
} // namespace mlir

using namespace mlir;

template <typename GPUOp, typename GENOp>
class GPUIndexOpToGENLowering : public OpConversionPattern<GPUOp> {
public:
using OpConversionPattern<GPUOp>::OpConversionPattern;
using OpAdaptor = typename GPUOp::Adaptor;

LogicalResult
matchAndRewrite(GPUOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
auto dim = static_cast<std::uint32_t>(adaptor.getDimension());
Value idxDim = rewriter.create<arith::ConstantIntOp>(op->getLoc(), dim, 32);
rewriter.replaceOpWithNewOp<GENOp>(op, rewriter.getIndexType(), idxDim);
return success();
}
};

class GPUBarrierToGENLowering : public OpConversionPattern<gpu::BarrierOp> {
public:
using OpConversionPattern<gpu::BarrierOp>::OpConversionPattern;
using OpAdaptor = typename gpu::BarrierOp::Adaptor;

LogicalResult match(gpu::BarrierOp op) const final { return success(); }

void rewrite(gpu::BarrierOp op, OpAdaptor,
ConversionPatternRewriter &rewriter) const final {
rewriter.replaceOpWithNewOp<GEN::BarrierOp>(op);
}
};

class GPUShuffleToGENLowering : public OpConversionPattern<gpu::ShuffleOp> {
public:
using OpConversionPattern<gpu::ShuffleOp>::OpConversionPattern;
using OpAdaptor = typename gpu::ShuffleOp::Adaptor;

LogicalResult
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {

auto gpuMode = adaptor.getMode();
const auto genMode = [](gpu::ShuffleMode mode) {
switch (mode) {
case gpu::ShuffleMode::XOR:
return GEN::ShflKind::XOR;
case gpu::ShuffleMode::DOWN:
return GEN::ShflKind::DOWN;
case gpu::ShuffleMode::UP:
return GEN::ShflKind::UP;
case gpu::ShuffleMode::IDX:
return GEN::ShflKind::IDX;
}
llvm_unreachable("expected a matching shuffle mode");
}(gpuMode);

// TODO unable to validate gpu width parameter, potential for producing
// invalid code
IntegerAttr widthAttr;
if (!matchPattern(adaptor.getWidth(), m_Constant(&widthAttr))) {
return rewriter.notifyMatchFailure(
op, "shuffle width must be a constant value");
}

Value trueValue = rewriter.create<arith::ConstantOp>(
op->getLoc(), rewriter.getBoolAttr(true));
auto result = rewriter.create<GEN::SubGroupShuffleOp>(
op->getLoc(), op->getResult(0).getType(), adaptor.getValue(),
adaptor.getOffset(), genMode);

rewriter.replaceOp(op, {result, trueValue});
return success();
}
};

void mlir::populateGPUToGENPatterns(RewritePatternSet &patterns) {
patterns.add<GPUIndexOpToGENLowering<gpu::ThreadIdOp, GEN::LocalIdOp>,
GPUIndexOpToGENLowering<gpu::BlockIdOp, GEN::WorkGroupIdOp>,
GPUIndexOpToGENLowering<gpu::BlockDimOp, GEN::WorkGroupSizeOp>,
GPUIndexOpToGENLowering<gpu::GridDimOp, GEN::NumWorkGroupsOp>,
GPUBarrierToGENLowering, GPUShuffleToGENLowering>(
patterns.getContext());
}

namespace {
struct ConvertGpuOpsToGENOpsPass
: public impl::ConvertGpuOpsToGENOpsBase<ConvertGpuOpsToGENOpsPass> {
void runOnOperation() override {
ConversionTarget target(getContext());

target.addLegalOp<arith::ConstantOp>();
target.addLegalDialect<GEN::GENDialect>();
// The ops of gpu dialect that can currently be mapped to GEN
target.addIllegalOp<gpu::ThreadIdOp, gpu::BlockIdOp, gpu::BlockDimOp,
gpu::GridDimOp, gpu::BarrierOp, gpu::ShuffleOp>();

mlir::RewritePatternSet patterns(&getContext());
populateGPUToGENPatterns(patterns);

if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
signalPassFailure();
}
};
} // namespace
159 changes: 159 additions & 0 deletions mlir/test/Conversion/GPUToGEN/gpu-to-gen.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// RUN: mlir-opt -split-input-file -convert-gpu-to-gen %s | FileCheck %s

gpu.module @local_id_kernels {
// CHECK-LABEL: gen_local_id_x
gpu.func @gen_local_id_x() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 0 : i32
// CHECK: gen.local_id [[DIM]]
%0 = gpu.thread_id x
gpu.return
}

// CHECK-LABEL: gen_local_id_y
gpu.func @gen_local_id_y() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 1 : i32
// CHECK: gen.local_id [[DIM]]
%0 = gpu.thread_id y
gpu.return
}

// CHECK-LABEL: gen_local_id_z
gpu.func @gen_local_id_z() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 2 : i32
// CHECK: gen.local_id [[DIM]]
%0 = gpu.thread_id z
gpu.return
}
}

// -----


gpu.module @work_group_id_kernels {
// CHECK-LABEL: gen_work_group_id_x
gpu.func @gen_work_group_id_x() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 0 : i32
// CHECK: gen.work_group_id [[DIM]]
%0 = gpu.block_id x
gpu.return
}

// CHECK-LABEL: gen_work_group_id_y
gpu.func @gen_work_group_id_y() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 1 : i32
// CHECK: gen.work_group_id [[DIM]]
%0 = gpu.block_id y
gpu.return
}

// CHECK-LABEL: gen_work_group_id_z
gpu.func @gen_work_group_id_z() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 2 : i32
// CHECK: gen.work_group_id [[DIM]]
%0 = gpu.block_id z
gpu.return
}
}

// -----


gpu.module @work_group_size_kernels {
// CHECK-LABEL: gen_work_group_size_x
gpu.func @gen_work_group_size_x() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 0 : i32
// CHECK: gen.work_group_size [[DIM]]
%0 = gpu.block_dim x
gpu.return
}

// CHECK-LABEL: gen_work_group_size_y
gpu.func @gen_work_group_size_y() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 1 : i32
// CHECK: gen.work_group_size [[DIM]]
%0 = gpu.block_dim y
gpu.return
}

// CHECK-LABEL: gen_work_group_size_z
gpu.func @gen_work_group_size_z() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 2 : i32
// CHECK: gen.work_group_size [[DIM]]
%0 = gpu.block_dim z
gpu.return
}
}

// -----


gpu.module @num_work_groups_kernels {
// CHECK-LABEL: gen_num_work_groups_x
gpu.func @gen_num_work_groups_x() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 0 : i32
// CHECK: gen.num_work_groups [[DIM]]
%0 = gpu.grid_dim x
gpu.return
}

// CHECK-LABEL: gen_num_work_groups_y
gpu.func @gen_num_work_groups_y() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 1 : i32
// CHECK: gen.num_work_groups [[DIM]]
%0 = gpu.grid_dim y
gpu.return
}

// CHECK-LABEL: gen_num_work_groups_z
gpu.func @gen_num_work_groups_z() kernel {
// CHECK: [[DIM:%.*]] = arith.constant 2 : i32
// CHECK: gen.num_work_groups [[DIM]]
%0 = gpu.grid_dim z
gpu.return
}
}

// -----

gpu.module @barrier_kernels {
// CHECK-LABEL: gen_barrier
gpu.func @gen_barrier() kernel {
// CHECK: gen.barrier
gpu.barrier
gpu.return
}
}

// -----

// CHECK-LABEL gpu.module @shuffle_kernels
gpu.module @shuffle_kernels {
// CHECK: gpu.func @gen_shuffle_xor(%[[IN_XOR:.*]]: f32, %[[OFFSET_XOR:.*]]: i32) kernel {
gpu.func @gen_shuffle_xor(%in : f32, %offset: i32) kernel {
// CHECK: %{{.*}} = gen.sub_group_shuffle xor %[[IN_XOR]], %[[OFFSET_XOR]] : f32
%width = arith.constant 32 : i32
%0, %1 = gpu.shuffle xor %in, %offset, %width : f32
gpu.return
}
// CHECK: gpu.func @gen_shuffle_up(%[[IN_UP:.*]]: f32, %[[OFFSET_UP:.*]]: i32) kernel {
gpu.func @gen_shuffle_up(%in : f32, %offset: i32) kernel {
// CHECK: %{{.*}} = gen.sub_group_shuffle up %[[IN_UP]], %[[OFFSET_UP]] : f32
%width = arith.constant 32 : i32
%0, %1 = gpu.shuffle up %in, %offset, %width : f32
gpu.return
}
// CHECK: gpu.func @gen_shuffle_down(%[[IN_DOWN:.*]]: f32, %[[OFFSET_DOWN:.*]]: i32) kernel {
gpu.func @gen_shuffle_down(%in : f32, %offset: i32) kernel {
// CHECK: %{{.*}} = gen.sub_group_shuffle down %[[IN_DOWN]], %[[OFFSET_DOWN]] : f32
%width = arith.constant 32 : i32
%0, %1 = gpu.shuffle down %in, %offset, %width : f32
gpu.return
}
// CHECK: gpu.func @gen_shuffle_idx(%[[IN_IDX:.*]]: f32, %[[OFFSET_IDX:.*]]: i32) kernel {
gpu.func @gen_shuffle_idx(%in : f32, %offset: i32) kernel {
// CHECK: %{{.*}} = gen.sub_group_shuffle idx %[[IN_IDX]], %[[OFFSET_IDX]] : f32
%width = arith.constant 32 : i32
%0, %1 = gpu.shuffle idx %in, %offset, %width : f32
gpu.return
}
}