add test & bugfix for new pass

Huang, Haixin · zhczhong · commit b824db2a8b56 · 2024-06-27T01:32:46.000-07:00
diff --git a/include/gc/Transforms/Microkernel/MicrokernelPasses.td b/include/gc/Transforms/Microkernel/MicrokernelPasses.td
@@ -62,7 +62,7 @@ def EarlyDispatchMicrokernel: Pass<"early-dispatch-microkernel", "::mlir::Module
                            "microkernel::MicrokernelDialect"];
 }
 
-def MergeBranchMicrokernelContext: Pass<"merge-branch-microkernel-context", "::mlir::func::FuncOp"> {
+def MergeBranchMicrokernelContext: Pass<"merge-branch-microkernel-context", "::mlir::ModuleOp"> {
   let summary = "Find and merge identical microkernel context operations in branches into one";
   let description = [{
     Find and merge identical microkernel context operations in branches into one.
diff --git a/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp b/lib/gc/Transforms/Microkernel/MergeBranchMicrokernelContext.cpp
@@ -38,6 +38,8 @@ class BrgemmDispatchAnalysis {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BrgemmDispatchAnalysis)
   explicit BrgemmDispatchAnalysis(Operation *);
   void setKernelDispatch(Operation *tilecfg, Operation *dispatch) {
+    LLVM_DEBUG(llvm::dbgs() << "* setKernelDispatch: " << tilecfg << "; "
+                            << dispatch << "\n");
     brgemmDispatches[tilecfg] = dispatch;
   };
   Operation *getKernelDispatch(Operation *tilecfg) const {
@@ -50,6 +52,8 @@ class BrgemmDispatchAnalysis {
 };
 
 BrgemmDispatchAnalysis::BrgemmDispatchAnalysis(Operation *root) {
+  LLVM_DEBUG(llvm::dbgs() << "* construct BrgemmDispatchAnalysis: " << *root
+                          << "\n");
   ModuleOp module = dyn_cast_or_null<ModuleOp>(root);
   if (!module)
     return;
@@ -108,6 +112,8 @@ BrgemmDispatchAnalysis::traceDispatchInGlobalCtor(ModuleOp module,
   for (auto &opRef : body.getOps()) {
     auto *op = &opRef;
     auto tryCallOp = dyn_cast_or_null<func::CallOp>(op);
+    if (!tryCallOp)
+      continue;
     auto callee = tryCallOp.getCalleeAttr().getAttr();
     if (callee == StringAttr::get(op->getContext(), DNNL_BRGEMM_DISPATCH_NAME))
       return op;
@@ -122,8 +128,11 @@ extractTileOpsFromRegion(Region &region) {
   std::pair<Operation *, Operation *> ret{nullptr, nullptr};
 
   for (auto &opRef : region.getOps()) {
+    LLVM_DEBUG(llvm::dbgs() << ">>> " << opRef << "\n");
     auto *op = &opRef;
     auto tryCallOp = dyn_cast_or_null<func::CallOp>(op);
+    if (!tryCallOp)
+      continue;
     auto callee = tryCallOp.getCalleeAttr().getAttr();
     if (callee == StringAttr::get(op->getContext(), DNNL_BRGEMM_TILECFG_NAME))
       ret.first = op;
diff --git a/test/mlir/test/gc/Dialect/Microkernel/merge-branch-microkernel-context.mlir b/test/mlir/test/gc/Dialect/Microkernel/merge-branch-microkernel-context.mlir
@@ -0,0 +1,278 @@
+// RUN: gc-opt %s -early-dispatch-microkernel -convert-microkernel-to-dnnl-func -merge-branch-microkernel-context -split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+module {
+  func.func @simple_brgemm() {
+    %c0_i64 = arith.constant 0 : i64
+    %c0_index = arith.constant 0 : index
+    %c1_index = arith.constant 1 : index
+    %c4_index = arith.constant 4 : index
+    %c8_index = arith.constant 8 : index
+    %c16_i64 = arith.constant 16 : i64
+    %cst = arith.constant 0.000000e+00 : f32
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<4x16x32x32xbf16>
+    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<8x16x16x32x2xbf16>
+    %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<4x8x32x32xf32>
+    scf.for %arg0 = %c0_index to %c4_index step %c1_index {
+        scf.for %arg1 = %c0_index to %c8_index step %c1_index {
+	      %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
+	      linalg.fill ins(%cst : f32) outs(%alloc_3 : memref<32x32xf32>)
+	      %subview = memref.subview %alloc[%arg0, 0, 0, 0] [1, 16, 32, 32] [1, 1, 1, 1] : memref<4x16x32x32xbf16> to memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>
+	      %subview_4 = memref.subview %alloc_0[%arg1, 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] : memref<8x16x16x32x2xbf16> to memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>
+              %cmp = arith.cmpi eq, %arg0, %c0_index : index
+              scf.if %cmp {
+	      	%0 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (beta_0, stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%0) : (i64) -> ()
+	      	microkernel.brgemm(%0, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%0) : (i64) -> ()
+	      } else {
+	      	%1 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%1) : (i64) -> ()
+	      	microkernel.brgemm(%1, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%1) : (i64) -> ()
+	      }
+	      memref.dealloc %alloc_3 : memref<32x32xf32>
+        }
+    }
+    return
+  }
+}
+
+// CHECK-LABEL: simple_brgemm
+
+// CHECK: scf.for %arg0 = %c0 to %c4 step %c1
+// CHECK-NEXT: scf.for %arg1 = %c0 to %c8 step %c1 
+
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK-NEXT: scf.if
+// CHECK: } else {
+// CHECK: }
+// CHECK-NEXT: func.call @dnnl_brgemm_tilerelease() : () -> () 
+
+// -----
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+module {
+  func.func @simple_brgemm() {
+    %c0_i64 = arith.constant 0 : i64
+    %c0_index = arith.constant 0 : index
+    %c1_index = arith.constant 1 : index
+    %c4_index = arith.constant 4 : index
+    %c8_index = arith.constant 8 : index
+    %c16_i64 = arith.constant 16 : i64
+    %cst = arith.constant 0.000000e+00 : f32
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<4x16x32x32xbf16>
+    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<8x16x16x32x2xbf16>
+    %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<4x8x32x32xf32>
+    scf.for %arg0 = %c0_index to %c4_index step %c1_index {
+        scf.for %arg1 = %c0_index to %c8_index step %c1_index {
+	      %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
+	      linalg.fill ins(%cst : f32) outs(%alloc_3 : memref<32x32xf32>)
+	      %subview = memref.subview %alloc[%arg0, 0, 0, 0] [1, 16, 32, 32] [1, 1, 1, 1] : memref<4x16x32x32xbf16> to memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>
+	      %subview_4 = memref.subview %alloc_0[%arg1, 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] : memref<8x16x16x32x2xbf16> to memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>
+              %cmp = arith.cmpi eq, %arg0, %c0_index : index
+              scf.if %cmp {
+	      	%0 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (beta_0, stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%0) : (i64) -> ()
+	      	microkernel.brgemm(%0, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%0) : (i64) -> ()
+	      }
+	      memref.dealloc %alloc_3 : memref<32x32xf32>
+        }
+    }
+    return
+  }
+}
+
+// CHECK-LABEL: simple_brgemm
+
+// CHECK: scf.for %arg0 = %c0 to %c4 step %c1
+// CHECK-NEXT: scf.for %arg1 = %c0 to %c8 step %c1 
+
+// CHECK: scf.if
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: }
+
+// -----
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+module {
+  func.func @simple_brgemm() {
+    %c0_i64 = arith.constant 0 : i64
+    %c0_index = arith.constant 0 : index
+    %c1_index = arith.constant 1 : index
+    %c4_index = arith.constant 4 : index
+    %c8_index = arith.constant 8 : index
+    %c16_i64 = arith.constant 16 : i64
+    %cst = arith.constant 0.000000e+00 : f32
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<4x16x32x32xbf16>
+    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<8x16x16x32x2xbf16>
+    %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<4x8x32x32xf32>
+    scf.for %arg0 = %c0_index to %c4_index step %c1_index {
+        scf.for %arg1 = %c0_index to %c8_index step %c1_index {
+	      %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
+	      linalg.fill ins(%cst : f32) outs(%alloc_3 : memref<32x32xf32>)
+	      %subview = memref.subview %alloc[%arg0, 0, 0, 0] [1, 16, 32, 32] [1, 1, 1, 1] : memref<4x16x32x32xbf16> to memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>
+	      %subview_4 = memref.subview %alloc_0[%arg1, 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] : memref<8x16x16x32x2xbf16> to memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>
+              %cmp = arith.cmpi eq, %arg0, %c0_index : index
+              scf.if %cmp {
+	      	%0 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (beta_0, stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%0) : (i64) -> ()
+	      	microkernel.brgemm(%0, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%0) : (i64) -> ()
+	      } else {
+	      	%1 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 512, 512] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%1) : (i64) -> ()
+	      	microkernel.brgemm(%1, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%1) : (i64) -> ()
+              }
+	      memref.dealloc %alloc_3 : memref<32x32xf32>
+        }
+    }
+    return
+  }
+}
+
+// CHECK-LABEL: simple_brgemm
+
+// CHECK: scf.for %arg0 = %c0 to %c4 step %c1
+// CHECK-NEXT: scf.for %arg1 = %c0 to %c8 step %c1 
+
+// CHECK: scf.if
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: } else {
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: }
+
+// -----
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+module {
+  func.func @simple_brgemm() {
+    %c0_i64 = arith.constant 0 : i64
+    %c0_index = arith.constant 0 : index
+    %c1_index = arith.constant 1 : index
+    %c4_index = arith.constant 4 : index
+    %c8_index = arith.constant 8 : index
+    %c16_i64 = arith.constant 16 : i64
+    %cst = arith.constant 0.000000e+00 : f32
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<4x16x32x32xbf16>
+    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<8x16x16x32x2xbf16>
+    %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<4x8x32x32xf32>
+    scf.for %arg0 = %c0_index to %c4_index step %c1_index {
+        scf.for %arg1 = %c0_index to %c8_index step %c1_index {
+	      %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
+	      linalg.fill ins(%cst : f32) outs(%alloc_3 : memref<32x32xf32>)
+	      %subview = memref.subview %alloc[%arg0, 0, 0, 0] [1, 16, 32, 32] [1, 1, 1, 1] : memref<4x16x32x32xbf16> to memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>
+	      %subview_4 = memref.subview %alloc_0[%arg1, 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] : memref<8x16x16x32x2xbf16> to memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>
+              scf.index_switch %arg0
+              case 0 {
+	      	%0 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (beta_0, stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%0) : (i64) -> ()
+	      	microkernel.brgemm(%0, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%0) : (i64) -> ()
+                scf.yield
+              }
+              case 1 {
+	      	%1 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%1) : (i64) -> ()
+	      	microkernel.brgemm(%1, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%1) : (i64) -> ()
+                scf.yield
+              }
+              default {
+	      	%2 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%2) : (i64) -> ()
+	      	microkernel.brgemm(%2, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%2) : (i64) -> () 
+                scf.yield
+              }
+	      memref.dealloc %alloc_3 : memref<32x32xf32>
+        }
+    }
+    return
+  }
+}
+
+// CHECK-LABEL: simple_brgemm
+
+// CHECK: scf.for %arg0 = %c0 to %c4 step %c1
+// CHECK-NEXT: scf.for %arg1 = %c0 to %c8 step %c1 
+
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK-NEXT: scf.index_switch
+// CHECK: case 0 {
+// CHECK: case 1 {
+// CHECK: default {
+// CHECK: }
+// CHECK-NEXT: func.call @dnnl_brgemm_tilerelease() : () -> () 
+
+// -----
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+module {
+  func.func @simple_brgemm() {
+    %c0_i64 = arith.constant 0 : i64
+    %c0_index = arith.constant 0 : index
+    %c1_index = arith.constant 1 : index
+    %c4_index = arith.constant 4 : index
+    %c8_index = arith.constant 8 : index
+    %c16_i64 = arith.constant 16 : i64
+    %cst = arith.constant 0.000000e+00 : f32
+    %alloc = memref.alloc() {alignment = 64 : i64} : memref<4x16x32x32xbf16>
+    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<8x16x16x32x2xbf16>
+    %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<4x8x32x32xf32>
+    scf.for %arg0 = %c0_index to %c4_index step %c1_index {
+        scf.for %arg1 = %c0_index to %c8_index step %c1_index {
+	      %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
+	      linalg.fill ins(%cst : f32) outs(%alloc_3 : memref<32x32xf32>)
+	      %subview = memref.subview %alloc[%arg0, 0, 0, 0] [1, 16, 32, 32] [1, 1, 1, 1] : memref<4x16x32x32xbf16> to memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>
+	      %subview_4 = memref.subview %alloc_0[%arg1, 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] : memref<8x16x16x32x2xbf16> to memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>
+              scf.index_switch %arg0
+              case 0 {
+	      	%0 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (beta_0, stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%0) : (i64) -> ()
+	      	microkernel.brgemm(%0, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%0) : (i64) -> ()
+                scf.yield
+              }
+              case 1 {
+	      	%1 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 1024, 1024] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%1) : (i64) -> ()
+	      	microkernel.brgemm(%1, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%1) : (i64) -> ()
+                scf.yield
+              }
+              default {
+	      	%2 = microkernel.brgemm.dispatch [32, 32, 32, 32, 32, 32, 512, 512] flags = (stride) data_type = (bf16, bf16) 
+	      	microkernel.brgemm.prologue(%2) : (i64) -> ()
+	      	microkernel.brgemm(%2, %subview, %subview_4, %alloc_3, %c16_i64, %c0_i64) : (i64, memref<16x32x32xbf16, strided<[1024, 32, 1], offset: ?>>, memref<16x16x32x2xbf16, strided<[1024, 64, 2, 1], offset: ?>>, memref<32x32xf32>, i64, i64) -> ()
+	      	microkernel.brgemm.epilogue(%2) : (i64) -> () 
+                scf.yield
+              }
+	      memref.dealloc %alloc_3 : memref<32x32xf32>
+        }
+    }
+    return
+  }
+}
+
+// CHECK-LABEL: simple_brgemm
+
+// CHECK: scf.for %arg0 = %c0 to %c4 step %c1
+// CHECK-NEXT: scf.for %arg1 = %c0 to %c8 step %c1 
+
+// CHECK: scf.index_switch
+// CHECK: case 0 {
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: case 1 {
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: default {
+// CHECK: func.call @dnnl_brgemm_tileconfig
+// CHECK: func.call @dnnl_brgemm_tilerelease() : () -> () 
+// CHECK: }

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ def EarlyDispatchMicrokernel: Pass<"early-dispatch-microkernel", "::mlir::Module`
`62`	`62`	`"microkernel::MicrokernelDialect"];`
`63`	`63`	`}`
`64`	`64`
`65`		`-def MergeBranchMicrokernelContext: Pass<"merge-branch-microkernel-context", "::mlir::func::FuncOp"> {`
	`65`	`+def MergeBranchMicrokernelContext: Pass<"merge-branch-microkernel-context", "::mlir::ModuleOp"> {`
`66`	`66`	`let summary = "Find and merge identical microkernel context operations in branches into one";`
`67`	`67`	`let description = [{`
`68`	`68`	`Find and merge identical microkernel context operations in branches into one.`