Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition. " #109666

chencha3 · 2024-09-23T14:04:52Z

…Scatter …" This reverts commit 2162723.

llvmbot · 2024-09-23T14:05:31Z

@llvm/pr-subscribers-mlir-gpu

Author: Chao Chen (chencha3)

Changes

Reverts llvm/llvm-project#109144

Patch is 52.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109666.diff

7 Files Affected:

(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td (+19-45)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+25-61)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+26-47)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+13-33)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+28-81)
(modified) mlir/test/Dialect/XeGPU/XeGPUOps.mlir (+30-44)
(modified) mlir/test/Dialect/XeGPU/invalid.mlir (+27-48)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 26eec0d4f2082a..f3ca09a6a68ea8 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -19,18 +19,12 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
   let mnemonic = attrMnemonic;
 }
 
-class XeGPU_TensorDescAttr<string name, string attrMnemonic, list<Trait> traits = [],
-                         string baseCppClass = "::mlir::Attribute">
-    : XeGPUAttr<name, attrMnemonic, traits, baseCppClass> {
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_tdesc_attr"> {
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
   let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`BlockTensorDesc` (or `block_tdesc_attr`) is a composite
+  let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite
     attribute defined for `TensorDescType` for describing following
     properties of a `TensorDesc`.
-    1. `memory_space`: It describes where the data block described by the
+    1. `memory_scope`: It describes where the data block described by the
         TensorDesc is located, `Global` device memory or `Shared` local memory.
         It is default to `Global`.
     2. `array_length`: It describes how many horizontally consecutive blocks
@@ -39,63 +33,43 @@ def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_td
         8x32. Its default value is 1.
     3. `boundary_check`: It is used to indicates the hardware whether to do
         out-of-boundary check. The default value is true.
+    4. `scattered`: It is used to differenciate TensorDescs created from
+       `create_nd_tdesc` vs from `create_tdesc`.
   }];
 
   let parameters = (ins
-    OptionalParameter<"MemorySpaceAttr">: $memory_space,
+    OptionalParameter<"MemoryScopeAttr">: $memory_scope,
     OptionalParameter<"IntegerAttr", "1">: $array_length,
-    OptionalParameter<"BoolAttr", "true">: $boundary_check
+    OptionalParameter<"BoolAttr", "true">: $boundary_check,
+    OptionalParameter<"BoolAttr", "false">: $scattered
   );
 
   let builders = [
     AttrBuilder<(ins
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
       CArg<"int", "1">:$array_length,
-      CArg<"bool", "true">: $boundary_check
+      CArg<"bool", "true">: $boundary_check,
+      CArg<"bool", "false">: $scattered
     )>
   ];
 
+  let assemblyFormat = "`<` struct(params) `>`";
 }
 
-def XeGPU_ScatterTensorDescAttr: XeGPU_TensorDescAttr<"ScatterTensorDesc", "scatter_tdesc_attr"> {
-  let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`ScatterTensorDesc` (or `scatter_tdesc_attr`) is a composite
-    attribute defined for `TensorDescType` for describing following
-    properties of a `TensorDesc`.
-    1. `memory_space`: It describes where the data block described by the
-        TensorDesc is located, `Global` device memory or `Shared` local memory.
-        It is default to `Global`.
-    2.  `chunk_size`: indicates number of continious elements accessed for each
-        offset, default is 1. It is used with `scattered` attr only.
-  }];
-
-  let parameters = (ins
-    OptionalParameter<"MemorySpaceAttr">: $memory_space,
-    OptionalParameter<"IntegerAttr", "1">: $chunk_size
-  );
-
-  let builders = [
-    AttrBuilder<(ins
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
-      CArg<"int", "1">: $chunk_size
-    )>
-  ];
- }
-
 //===----------------------------------------------------------------------===//
 // XeGPU Memory Scope Enums.
 //===----------------------------------------------------------------------===//
-def XeGPU_MemorySpaceGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemorySpaceShared: I32EnumAttrCase<"SLM", 3, "slm">;
-def XeGPU_MemorySpace: I32EnumAttr<"MemorySpace",
+def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
+def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
+def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
       "The address space of the memory the tensor descritor is created for",
-      [XeGPU_MemorySpaceGlobal, XeGPU_MemorySpaceShared]> {
+      [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::xegpu";
 }
 
-def XeGPU_MemorySpaceAttr:
-  EnumAttr<XeGPU_Dialect, XeGPU_MemorySpace, "memory_space"> {
+def XeGPU_MemoryScopeAttr:
+  EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
     let summary = [{Describe the location of data described by a `TensorDesc`:
                  Global device memory (`Global`) or Shared local memory (`SLM`).}];
     let assemblyFormat = "$value";
@@ -142,4 +116,4 @@ def XeGPU_FenceScopeAttr:
     let assemblyFormat = "$value";
 }
 
-#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e24a056de2caf3..c32c7541c39791 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -218,23 +218,6 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
 
     mlir::Value getViewSource() { return getSource(); }
-
-    unsigned getSourceMemorySpace() {
-      auto srcTy = getSourceType();
-      if (auto memrefTy = llvm::dyn_cast<mlir::MemRefType>(srcTy)) {
-        auto attr = memrefTy.getMemorySpace();
-        if (attr) {
-          if (auto intAttr = llvm::dyn_cast<mlir::IntegerAttr>(attr)) {
-            return static_cast<unsigned>(intAttr.getInt());
-          }
-          if (auto memSpaceAttr = llvm::dyn_cast<MemorySpaceAttr>(attr))
-            return static_cast<unsigned>(memSpaceAttr.getValue());
-        }
-      }
-      // take global as default memory scope.
-      return static_cast<unsigned>(MemorySpace::Global);
-    }
-
   }];
 }
 
@@ -428,10 +411,8 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
       is fixed to the hardware supportted subgroup size, e.g., 16 on PVC,
       implying each element in the array corresponds to a work-item (SIMT lane)
       in the subgroup.
-
-    The first dimension of the result TensorDesc corresponds to work-items, so it should
-    match the dimension of offsets. It may also has a second dimension corresponding to
-    the chunk_size if the chunk size is larger than 1.
+    * chunk_size: [optional attribute] indicates number of continious
+      elements accessed for each offset, default is 1.
 
     Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
     ```mlir
@@ -443,22 +424,29 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
                It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
     ```mlir
     %0 = memref.alloc() : memref<1024xf32>
-    %1 = xegpu.create_tdesc %0[0, 16, 32, 64] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>
+    %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
     ```
 
     Example 3. It is similar to Example 2, but there is some overlaps among workitems.
                It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
     ```mlir
     %0 = memref.alloc() : memref<1024xf32>
-    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>>
+    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
     ```
   }];
 
   let arguments = (ins XeGPU_BaseAddrType: $source,
                        Variadic<Index>: $offsets,
-                       DenseI64ArrayAttr: $const_offsets);
+                       DenseI64ArrayAttr: $const_offsets,
+                       DefaultValuedAttr<I64Attr, "1">: $chunk_size);
   let results = (outs XeGPU_TensorDesc:$TensorDesc);
 
+  let builders = [
+    OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source,
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   CArg<"uint32_t", "1"> : $chunk_size)>,
+  ];
+
   let assemblyFormat = [{
     $source
     custom<DynamicIndexList>($offsets, $const_offsets)
@@ -485,22 +473,6 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
       assert(idx < getNumOffsets() && "Invalid out of bound access.");
       return getMixedOffsets()[idx];
     }
-
-    unsigned getSourceMemorySpace() {
-      auto srcTy = getSource().getType();
-      if (auto memrefTy = llvm::dyn_cast<mlir::MemRefType>(srcTy)) {
-        auto attr = memrefTy.getMemorySpace();
-        if (attr) {
-          if (auto intAttr = llvm::dyn_cast<mlir::IntegerAttr>(attr))
-            return static_cast<unsigned>(intAttr.getInt());
-          if (auto memSpaceAttr = llvm::dyn_cast<MemorySpaceAttr>(attr))
-            return static_cast<unsigned>(memSpaceAttr.getValue());
-        }
-      }
-      // take global as default memory scope.
-      return static_cast<unsigned>(MemorySpace::Global);
-    }
-
   }];
 
   let hasVerifier = 1;
@@ -548,31 +520,28 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]
 
   let description = [{ It (aka. load) load data per each work-item. The output
     describes the data being loaded at the subgroup level, so its size is
-    consistent with the number of work-items in a subgroup. When the chunk size
-    is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
-    to work-items, and dim-0 corresponding to the chunk_size loaded by each work-item.
-    Specially, there is a transpose effect on the result (as compared to the TensorDesc)
-    due to the hardware implementation. Therefore, a transpose attribute is introduced
-    on purpose, making sure users are aware of this implicit transformation.
+    consistent with the number of work-items in a subgroup. When `chunk_size_per_lane`
+    attribute is larger than 1 in TensorDesc, the output vector will be 2D vector,
+    with dim-1 correspoding to the chunk size.
 
     The mask operand masks out memory access so that it is safe to pass out-of-boundary
     addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
 
   Example:
   ```mlir
-    %2 = xegpu.load %1, %0 {transpose,
+    %2 = xegpu.load %1, %0 {transpose = [1, 0],
                             l1_hint = #xegpu.cache_hint<cached>,
                             l2_hint = #xegpu.cache_hint<uncached>,
                             l3_hint = #xegpu.cache_hint<uncached>}
-          : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
-            vector<16xi1> -> vector<16xf32>
+          : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr<scattered=true>>, vector<16xi1>
+            -> vector<16xf32>
   ```
 
   }];
 
   let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
                        XeGPU_MaskType: $mask,
-                       OptionalAttr<UnitAttr>: $transpose,
+                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
@@ -604,15 +573,11 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]
   let hasVerifier = 1;
 }
 
-def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "TensorDesc"]>,
-                                              AllElementTypesMatch<["value", "TensorDesc"]>]> {
+def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>,
+                                        AllElementTypesMatch<["value", "TensorDesc"]>]> {
   let summary = "store data to scattered memory locations.";
-  let description = [{ It (aka. store) stores data to scattered memory locations. The value is
-  typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
-  a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
-  and the dim-0 of the value corresponds to the chunk_size stored per lane. So `store_scatter`
-  has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
-  introduced on purpose, making sure users are aware of this implicit transformation.
+  let description = [{ It (aka. store) stores data to scattered memory locations.
+  It has similar semantic to `load_gather`.
 
   Example:
   ```mlir
@@ -627,7 +592,6 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "T
     XeGPU_ValueType: $value,
     XeGPU_TensorDesc: $TensorDesc,
     XeGPU_MaskType: $mask,
-    OptionalAttr<UnitAttr>: $transpose,
     OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
     OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
     OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
@@ -759,7 +723,7 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
 
 def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure,
       AllElementTypesMatch<["tensorDesc", "value", "result"]>,
-      AllShapesMatch<["tensorDesc", "value", "result"]>]> {
+      AllShapesMatch<["tensorDesc", "mask", "value", "result"]>]> {
   let summary = "Atomic ready-modify-write operation on the TensorDesc. ";
 
   let description = [{
@@ -844,7 +808,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
     2. `Fence_scope` describes the scope of fence. "Workgroup" means that the scope would be
         within each workgroup. "GPU" means the scope would be across workgroups within the GPU.
   }];
-  let arguments = (ins XeGPU_MemorySpaceAttr: $memory_kind,
+  let arguments = (ins XeGPU_MemoryScopeAttr: $memory_kind,
                        XeGPU_FenceScopeAttr: $fence_scope);
   let assemblyFormat = [{`memory_kind` `=` `` $memory_kind `,` `fence_scope` `=` `` $fence_scope attr-dict}];
   let extraClassDeclaration = extraBaseClassDeclaration;
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 0ce1211664b5ba..9f101a71697b56 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -48,7 +48,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
 
     Similar to the builtin tensor, it also provides an optinal attribute to encoding
     the following information via the TensorDescAttr object:
-    * memory_space (xegpu::MemorySpace): [optional] where the data is located,
+    * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
                 global memory or shared memory. It is default to Global.
     * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
                that will be loaded by block load at a time. It is default to 1.
@@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     element-type ::= float-type | integer-type | index-type
     dim-list := (static-dim-list `x`)?
     static-dim-list ::= decimal-literal `x` decimal-literal
-    attr-list = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
+    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
     ```
 
     Examples:
@@ -76,7 +76,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     xegpu.tensor_desc<8x16xf32>
 
     // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space.
-    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_space = slm>>
+    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_scope = slm>>
     ```
   }];
 
@@ -88,14 +88,11 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     TypeBuilderWithInferredContext<(ins
       "llvm::ArrayRef<int64_t>": $shape,
       "mlir::Type": $elementType,
+      CArg<"bool", "false">: $scattered,
       CArg<"int", "1">: $array_length,
-      CArg<"bool", "true">: $boundary_check,
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>,
-    TypeBuilderWithInferredContext<(ins
-      "llvm::ArrayRef<int64_t>": $shape,
-      "mlir::Type": $elementType,
-      CArg<"int", "1">: $chunk_size,
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
+      CArg<"bool", "true">: $boundary_check
+    )>
   ];
 
   let extraClassDeclaration = [{
@@ -113,58 +110,40 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
       return llvm::cast<TensorDescType>(cloneWith(getShape(), elementType));
     }
 
-    BlockTensorDescAttr getEncodingAsBlockTensorDescAttr() const {
-      return llvm::dyn_cast_if_present<BlockTensorDescAttr>(getEncoding());
+    TensorDescAttr getEncodingAsTensorDescAttr() const {
+      return llvm::dyn_cast_if_present<TensorDescAttr>(getEncoding());
     }
 
-    ScatterTensorDescAttr getEncodingAsScatterTensorDescAttr() const {
-      return llvm::dyn_cast_if_present<ScatterTensorDescAttr>(getEncoding());
-    }
-
-    xegpu::MemorySpace getMemorySpace() const {
-      auto block_attr = getEncodingAsBlockTensorDescAttr();
-      if (block_attr && block_attr.getMemorySpace())
-        return block_attr.getMemorySpace().getValue();
-
-      auto scatter_attr = getEncodingAsScatterTensorDescAttr();
-      if (scatter_attr && scatter_attr.getMemorySpace())
-        return scatter_attr.getMemorySpace().getValue();
-
+    xegpu::MemoryScope getMemoryScope() const {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getMemoryScope())
+        return attr.getMemoryScope().getValue();
       // return default value
-      return MemorySpace::Global;
+      return MemoryScope::Global;
     }
 
     int getArrayLength() {
-      auto attr = getEncoding();
-      auto block_attr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(attr);
-      assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr.");
-      if (block_attr && block_attr.getArrayLength())
-        return block_attr.getArrayLength().getInt();
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getArrayLength())
+        return attr.getArrayLength().getInt();
       // return default value
       return 1;
     }
 
     bool getBoundaryCheck() {
-      auto attr = getEncoding();
-      auto block_attr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(attr);
-      assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr.");
-      if (block_attr && block_attr.getBoundaryCheck())
-        return block_attr.getBoundaryCheck().getValue();
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getBoundaryCheck())
+        return attr.getBoundaryCheck().getValue();
       // return default value
       return true;
     }
 
-    bool isScattered() {
-      return bool(getEncodingAsScatterTensorDescAttr());
-    }
-
-    int getChunkSize() {
-      auto attr = getEncoding();
-      auto scatter_attr = mlir::dyn_cast_if_present<ScatterTensorDescAttr>(attr);
-      assert((!attr || scatter_attr) && "invalid on non ScatterTensorDescAttr.");
-      if (scatter_attr && scatter_attr.getChunkSize())
-        return scatter_attr.getChunkSize().getInt();
-      return 1;
+    bool getScattered() {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getScattered())
+        return attr.getScattered().getValue();
+      // return default value
+      return false;
     }
   }];
 
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 1dfbaed454c193..24719fe748fe4f 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -30,35 +30,23 @@ void XeGPUDialect::initialize() {
 }
 
 //===----------------------------...
[truncated]

llvmbot · 2024-09-23T14:05:32Z

@llvm/pr-subscribers-mlir

Author: Chao Chen (chencha3)

Changes

Reverts llvm/llvm-project#109144

Patch is 52.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109666.diff

7 Files Affected:

(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td (+19-45)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+25-61)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+26-47)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+13-33)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+28-81)
(modified) mlir/test/Dialect/XeGPU/XeGPUOps.mlir (+30-44)
(modified) mlir/test/Dialect/XeGPU/invalid.mlir (+27-48)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 26eec0d4f2082a..f3ca09a6a68ea8 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -19,18 +19,12 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
   let mnemonic = attrMnemonic;
 }
 
-class XeGPU_TensorDescAttr<string name, string attrMnemonic, list<Trait> traits = [],
-                         string baseCppClass = "::mlir::Attribute">
-    : XeGPUAttr<name, attrMnemonic, traits, baseCppClass> {
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_tdesc_attr"> {
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
   let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`BlockTensorDesc` (or `block_tdesc_attr`) is a composite
+  let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite
     attribute defined for `TensorDescType` for describing following
     properties of a `TensorDesc`.
-    1. `memory_space`: It describes where the data block described by the
+    1. `memory_scope`: It describes where the data block described by the
         TensorDesc is located, `Global` device memory or `Shared` local memory.
         It is default to `Global`.
     2. `array_length`: It describes how many horizontally consecutive blocks
@@ -39,63 +33,43 @@ def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_td
         8x32. Its default value is 1.
     3. `boundary_check`: It is used to indicates the hardware whether to do
         out-of-boundary check. The default value is true.
+    4. `scattered`: It is used to differenciate TensorDescs created from
+       `create_nd_tdesc` vs from `create_tdesc`.
   }];
 
   let parameters = (ins
-    OptionalParameter<"MemorySpaceAttr">: $memory_space,
+    OptionalParameter<"MemoryScopeAttr">: $memory_scope,
     OptionalParameter<"IntegerAttr", "1">: $array_length,
-    OptionalParameter<"BoolAttr", "true">: $boundary_check
+    OptionalParameter<"BoolAttr", "true">: $boundary_check,
+    OptionalParameter<"BoolAttr", "false">: $scattered
   );
 
   let builders = [
     AttrBuilder<(ins
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
       CArg<"int", "1">:$array_length,
-      CArg<"bool", "true">: $boundary_check
+      CArg<"bool", "true">: $boundary_check,
+      CArg<"bool", "false">: $scattered
     )>
   ];
 
+  let assemblyFormat = "`<` struct(params) `>`";
 }
 
-def XeGPU_ScatterTensorDescAttr: XeGPU_TensorDescAttr<"ScatterTensorDesc", "scatter_tdesc_attr"> {
-  let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`ScatterTensorDesc` (or `scatter_tdesc_attr`) is a composite
-    attribute defined for `TensorDescType` for describing following
-    properties of a `TensorDesc`.
-    1. `memory_space`: It describes where the data block described by the
-        TensorDesc is located, `Global` device memory or `Shared` local memory.
-        It is default to `Global`.
-    2.  `chunk_size`: indicates number of continious elements accessed for each
-        offset, default is 1. It is used with `scattered` attr only.
-  }];
-
-  let parameters = (ins
-    OptionalParameter<"MemorySpaceAttr">: $memory_space,
-    OptionalParameter<"IntegerAttr", "1">: $chunk_size
-  );
-
-  let builders = [
-    AttrBuilder<(ins
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
-      CArg<"int", "1">: $chunk_size
-    )>
-  ];
- }
-
 //===----------------------------------------------------------------------===//
 // XeGPU Memory Scope Enums.
 //===----------------------------------------------------------------------===//
-def XeGPU_MemorySpaceGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemorySpaceShared: I32EnumAttrCase<"SLM", 3, "slm">;
-def XeGPU_MemorySpace: I32EnumAttr<"MemorySpace",
+def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
+def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
+def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
       "The address space of the memory the tensor descritor is created for",
-      [XeGPU_MemorySpaceGlobal, XeGPU_MemorySpaceShared]> {
+      [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::xegpu";
 }
 
-def XeGPU_MemorySpaceAttr:
-  EnumAttr<XeGPU_Dialect, XeGPU_MemorySpace, "memory_space"> {
+def XeGPU_MemoryScopeAttr:
+  EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
     let summary = [{Describe the location of data described by a `TensorDesc`:
                  Global device memory (`Global`) or Shared local memory (`SLM`).}];
     let assemblyFormat = "$value";
@@ -142,4 +116,4 @@ def XeGPU_FenceScopeAttr:
     let assemblyFormat = "$value";
 }
 
-#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e24a056de2caf3..c32c7541c39791 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -218,23 +218,6 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
 
     mlir::Value getViewSource() { return getSource(); }
-
-    unsigned getSourceMemorySpace() {
-      auto srcTy = getSourceType();
-      if (auto memrefTy = llvm::dyn_cast<mlir::MemRefType>(srcTy)) {
-        auto attr = memrefTy.getMemorySpace();
-        if (attr) {
-          if (auto intAttr = llvm::dyn_cast<mlir::IntegerAttr>(attr)) {
-            return static_cast<unsigned>(intAttr.getInt());
-          }
-          if (auto memSpaceAttr = llvm::dyn_cast<MemorySpaceAttr>(attr))
-            return static_cast<unsigned>(memSpaceAttr.getValue());
-        }
-      }
-      // take global as default memory scope.
-      return static_cast<unsigned>(MemorySpace::Global);
-    }
-
   }];
 }
 
@@ -428,10 +411,8 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
       is fixed to the hardware supportted subgroup size, e.g., 16 on PVC,
       implying each element in the array corresponds to a work-item (SIMT lane)
       in the subgroup.
-
-    The first dimension of the result TensorDesc corresponds to work-items, so it should
-    match the dimension of offsets. It may also has a second dimension corresponding to
-    the chunk_size if the chunk size is larger than 1.
+    * chunk_size: [optional attribute] indicates number of continious
+      elements accessed for each offset, default is 1.
 
     Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
     ```mlir
@@ -443,22 +424,29 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
                It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
     ```mlir
     %0 = memref.alloc() : memref<1024xf32>
-    %1 = xegpu.create_tdesc %0[0, 16, 32, 64] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>
+    %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
     ```
 
     Example 3. It is similar to Example 2, but there is some overlaps among workitems.
                It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
     ```mlir
     %0 = memref.alloc() : memref<1024xf32>
-    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>>
+    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
     ```
   }];
 
   let arguments = (ins XeGPU_BaseAddrType: $source,
                        Variadic<Index>: $offsets,
-                       DenseI64ArrayAttr: $const_offsets);
+                       DenseI64ArrayAttr: $const_offsets,
+                       DefaultValuedAttr<I64Attr, "1">: $chunk_size);
   let results = (outs XeGPU_TensorDesc:$TensorDesc);
 
+  let builders = [
+    OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source,
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   CArg<"uint32_t", "1"> : $chunk_size)>,
+  ];
+
   let assemblyFormat = [{
     $source
     custom<DynamicIndexList>($offsets, $const_offsets)
@@ -485,22 +473,6 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
       assert(idx < getNumOffsets() && "Invalid out of bound access.");
       return getMixedOffsets()[idx];
     }
-
-    unsigned getSourceMemorySpace() {
-      auto srcTy = getSource().getType();
-      if (auto memrefTy = llvm::dyn_cast<mlir::MemRefType>(srcTy)) {
-        auto attr = memrefTy.getMemorySpace();
-        if (attr) {
-          if (auto intAttr = llvm::dyn_cast<mlir::IntegerAttr>(attr))
-            return static_cast<unsigned>(intAttr.getInt());
-          if (auto memSpaceAttr = llvm::dyn_cast<MemorySpaceAttr>(attr))
-            return static_cast<unsigned>(memSpaceAttr.getValue());
-        }
-      }
-      // take global as default memory scope.
-      return static_cast<unsigned>(MemorySpace::Global);
-    }
-
   }];
 
   let hasVerifier = 1;
@@ -548,31 +520,28 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]
 
   let description = [{ It (aka. load) load data per each work-item. The output
     describes the data being loaded at the subgroup level, so its size is
-    consistent with the number of work-items in a subgroup. When the chunk size
-    is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
-    to work-items, and dim-0 corresponding to the chunk_size loaded by each work-item.
-    Specially, there is a transpose effect on the result (as compared to the TensorDesc)
-    due to the hardware implementation. Therefore, a transpose attribute is introduced
-    on purpose, making sure users are aware of this implicit transformation.
+    consistent with the number of work-items in a subgroup. When `chunk_size_per_lane`
+    attribute is larger than 1 in TensorDesc, the output vector will be 2D vector,
+    with dim-1 correspoding to the chunk size.
 
     The mask operand masks out memory access so that it is safe to pass out-of-boundary
     addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
 
   Example:
   ```mlir
-    %2 = xegpu.load %1, %0 {transpose,
+    %2 = xegpu.load %1, %0 {transpose = [1, 0],
                             l1_hint = #xegpu.cache_hint<cached>,
                             l2_hint = #xegpu.cache_hint<uncached>,
                             l3_hint = #xegpu.cache_hint<uncached>}
-          : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
-            vector<16xi1> -> vector<16xf32>
+          : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr<scattered=true>>, vector<16xi1>
+            -> vector<16xf32>
   ```
 
   }];
 
   let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
                        XeGPU_MaskType: $mask,
-                       OptionalAttr<UnitAttr>: $transpose,
+                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
@@ -604,15 +573,11 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]
   let hasVerifier = 1;
 }
 
-def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "TensorDesc"]>,
-                                              AllElementTypesMatch<["value", "TensorDesc"]>]> {
+def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>,
+                                        AllElementTypesMatch<["value", "TensorDesc"]>]> {
   let summary = "store data to scattered memory locations.";
-  let description = [{ It (aka. store) stores data to scattered memory locations. The value is
-  typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
-  a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
-  and the dim-0 of the value corresponds to the chunk_size stored per lane. So `store_scatter`
-  has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
-  introduced on purpose, making sure users are aware of this implicit transformation.
+  let description = [{ It (aka. store) stores data to scattered memory locations.
+  It has similar semantic to `load_gather`.
 
   Example:
   ```mlir
@@ -627,7 +592,6 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "T
     XeGPU_ValueType: $value,
     XeGPU_TensorDesc: $TensorDesc,
     XeGPU_MaskType: $mask,
-    OptionalAttr<UnitAttr>: $transpose,
     OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
     OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
     OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
@@ -759,7 +723,7 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
 
 def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure,
       AllElementTypesMatch<["tensorDesc", "value", "result"]>,
-      AllShapesMatch<["tensorDesc", "value", "result"]>]> {
+      AllShapesMatch<["tensorDesc", "mask", "value", "result"]>]> {
   let summary = "Atomic ready-modify-write operation on the TensorDesc. ";
 
   let description = [{
@@ -844,7 +808,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
     2. `Fence_scope` describes the scope of fence. "Workgroup" means that the scope would be
         within each workgroup. "GPU" means the scope would be across workgroups within the GPU.
   }];
-  let arguments = (ins XeGPU_MemorySpaceAttr: $memory_kind,
+  let arguments = (ins XeGPU_MemoryScopeAttr: $memory_kind,
                        XeGPU_FenceScopeAttr: $fence_scope);
   let assemblyFormat = [{`memory_kind` `=` `` $memory_kind `,` `fence_scope` `=` `` $fence_scope attr-dict}];
   let extraClassDeclaration = extraBaseClassDeclaration;
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 0ce1211664b5ba..9f101a71697b56 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -48,7 +48,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
 
     Similar to the builtin tensor, it also provides an optinal attribute to encoding
     the following information via the TensorDescAttr object:
-    * memory_space (xegpu::MemorySpace): [optional] where the data is located,
+    * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
                 global memory or shared memory. It is default to Global.
     * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
                that will be loaded by block load at a time. It is default to 1.
@@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     element-type ::= float-type | integer-type | index-type
     dim-list := (static-dim-list `x`)?
     static-dim-list ::= decimal-literal `x` decimal-literal
-    attr-list = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
+    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
     ```
 
     Examples:
@@ -76,7 +76,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     xegpu.tensor_desc<8x16xf32>
 
     // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space.
-    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_space = slm>>
+    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_scope = slm>>
     ```
   }];
 
@@ -88,14 +88,11 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     TypeBuilderWithInferredContext<(ins
       "llvm::ArrayRef<int64_t>": $shape,
       "mlir::Type": $elementType,
+      CArg<"bool", "false">: $scattered,
       CArg<"int", "1">: $array_length,
-      CArg<"bool", "true">: $boundary_check,
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>,
-    TypeBuilderWithInferredContext<(ins
-      "llvm::ArrayRef<int64_t>": $shape,
-      "mlir::Type": $elementType,
-      CArg<"int", "1">: $chunk_size,
-      CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
+      CArg<"bool", "true">: $boundary_check
+    )>
   ];
 
   let extraClassDeclaration = [{
@@ -113,58 +110,40 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
       return llvm::cast<TensorDescType>(cloneWith(getShape(), elementType));
     }
 
-    BlockTensorDescAttr getEncodingAsBlockTensorDescAttr() const {
-      return llvm::dyn_cast_if_present<BlockTensorDescAttr>(getEncoding());
+    TensorDescAttr getEncodingAsTensorDescAttr() const {
+      return llvm::dyn_cast_if_present<TensorDescAttr>(getEncoding());
     }
 
-    ScatterTensorDescAttr getEncodingAsScatterTensorDescAttr() const {
-      return llvm::dyn_cast_if_present<ScatterTensorDescAttr>(getEncoding());
-    }
-
-    xegpu::MemorySpace getMemorySpace() const {
-      auto block_attr = getEncodingAsBlockTensorDescAttr();
-      if (block_attr && block_attr.getMemorySpace())
-        return block_attr.getMemorySpace().getValue();
-
-      auto scatter_attr = getEncodingAsScatterTensorDescAttr();
-      if (scatter_attr && scatter_attr.getMemorySpace())
-        return scatter_attr.getMemorySpace().getValue();
-
+    xegpu::MemoryScope getMemoryScope() const {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getMemoryScope())
+        return attr.getMemoryScope().getValue();
       // return default value
-      return MemorySpace::Global;
+      return MemoryScope::Global;
     }
 
     int getArrayLength() {
-      auto attr = getEncoding();
-      auto block_attr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(attr);
-      assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr.");
-      if (block_attr && block_attr.getArrayLength())
-        return block_attr.getArrayLength().getInt();
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getArrayLength())
+        return attr.getArrayLength().getInt();
       // return default value
       return 1;
     }
 
     bool getBoundaryCheck() {
-      auto attr = getEncoding();
-      auto block_attr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(attr);
-      assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr.");
-      if (block_attr && block_attr.getBoundaryCheck())
-        return block_attr.getBoundaryCheck().getValue();
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getBoundaryCheck())
+        return attr.getBoundaryCheck().getValue();
       // return default value
       return true;
     }
 
-    bool isScattered() {
-      return bool(getEncodingAsScatterTensorDescAttr());
-    }
-
-    int getChunkSize() {
-      auto attr = getEncoding();
-      auto scatter_attr = mlir::dyn_cast_if_present<ScatterTensorDescAttr>(attr);
-      assert((!attr || scatter_attr) && "invalid on non ScatterTensorDescAttr.");
-      if (scatter_attr && scatter_attr.getChunkSize())
-        return scatter_attr.getChunkSize().getInt();
-      return 1;
+    bool getScattered() {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getScattered())
+        return attr.getScattered().getValue();
+      // return default value
+      return false;
     }
   }];
 
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 1dfbaed454c193..24719fe748fe4f 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -30,35 +30,23 @@ void XeGPUDialect::initialize() {
 }
 
 //===----------------------------...
[truncated]

github-actions · 2024-09-23T14:06:39Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Discourse for more information.

Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/…

20f7777

…Scatter …" This reverts commit 2162723.

llvmbot added mlir:gpu mlir labels Sep 23, 2024

adam-smnk mentioned this pull request Sep 23, 2024

[mlir][vector][xegpu] Vector to XeGPU conversion pass #107419

Merged

chencha3 merged commit 09e94d0 into main Sep 23, 2024
9 of 10 checks passed

chencha3 deleted the revert-109144-users/chencha3/xegpu/update_xegpu_tdesc_attr_and_gather_scatter branch September 23, 2024 14:32

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition. " #109666

Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition. " #109666

Uh oh!

chencha3 commented Sep 23, 2024

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

github-actions bot commented Sep 23, 2024

Uh oh!

Uh oh!

Uh oh!

Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition. " #109666

Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition. " #109666

Uh oh!

Conversation

chencha3 commented Sep 23, 2024

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

github-actions bot commented Sep 23, 2024

Uh oh!

Uh oh!

Uh oh!