refactor(parser): replace XML tags with markdown code blocks #331

phodal · phodal · commit ad7919ae2941 · 2025-03-15T14:40:20.000+08:00
- Replace `&lt;THOUGHT&gt;` and `&lt;PLAN&gt;` XML tags with markdown code blocks using `plan` language.
- Remove XML-specific parsing logic and simplify code fence handling.
- Update tests to reflect changes in code block parsing.
diff --git a/core/src/main/kotlin/cc/unitmesh/devti/sketch/ui/code/MarkdownViewer.kt b/core/src/main/kotlin/cc/unitmesh/devti/sketch/ui/code/MarkdownViewer.kt
@@ -21,9 +21,7 @@ object MarkdownViewer {
         val editorFontSize = EditorColorsManager.getInstance().schemeForCurrentUITheme.editorFontSize
 
         val fontFamilyAndSize = "font-family:'" + editorFontName + "'; font-size:" + editorFontSize + "pt;"
-        val backgroundColorCss = "background-color: #" + ColorUtil.toHex(backgroundColor) + ";"
-        htmlEditorKit.getStyleSheet().addRule("code { $backgroundColorCss$fontFamilyAndSize}")
-        htmlEditorKit.getStyleSheet().addRule("code.language-markdown { background-color: #ffffff; $fontFamilyAndSize}")
+        htmlEditorKit.getStyleSheet().addRule("code { $fontFamilyAndSize }")
         htmlEditorKit.getStyleSheet().addRule("p { margin-top: 1px }")
 
         jEditorPane.also {
diff --git a/core/src/main/kotlin/cc/unitmesh/devti/util/parser/CodeFence.kt b/core/src/main/kotlin/cc/unitmesh/devti/util/parser/CodeFence.kt
@@ -14,10 +14,6 @@ class CodeFence(
         private var lastTxtBlock: CodeFence? = null
         val devinStartRegex = Regex("<devin>")
         val devinEndRegex = Regex("</devin>")
-        val thoughtStartRegex = Regex("<THOUGHT>")
-        val thoughtEndRegex = Regex("</THOUGHT>")
-        val planStartRegex = Regex("<PLAN>")
-        val planEndRegex = Regex("</PLAN>")
 
         fun parse(content: String): CodeFence {
             val languageRegex = Regex("\\s*```([\\w#+ ]*)")
@@ -28,7 +24,6 @@ class CodeFence(
                 val endMatch = devinEndRegex.find(content)
                 val isComplete = endMatch != null
 
-                // 提取内容：如果有结束标签就截取中间内容，没有就取整个后续内容
                 val devinContent = if (isComplete) {
                     content.substring(startMatch.range.last + 1, endMatch!!.range.first).trim()
                 } else {
@@ -38,7 +33,6 @@ class CodeFence(
                 return CodeFence(findLanguage("DevIn"), devinContent, isComplete, "devin", "DevIn")
             }
 
-            // 原有的 Markdown 代码块解析逻辑
             var codeStarted = false
             var codeClosed = false
             var languageId: String? = null
@@ -87,8 +81,6 @@ class CodeFence(
                 content = preProcessDevinBlock(content)
             }
             
-            content = preProcessXmlBlocks(content)
-
             val startMatches = devinStartRegex.findAll(content)
             for (startMatch in startMatches) {
                 if (startMatch.range.first > currentIndex) {
@@ -124,38 +116,6 @@ class CodeFence(
             return codeFences.filter { it.text.isNotEmpty() }
         }
         
-        private fun preProcessXmlBlocks(content: String): String {
-            var currentContent = content
-            
-            // 处理<THOUGHT>标签
-            val thoughtMatches = Regex("(?<=^|\\n)<THOUGHT>([\\s\\S]*?)</THOUGHT>\\n?").findAll(content).toList()
-            for (match in thoughtMatches) {
-                val thoughtContent = match.groups[1]?.value ?: ""
-                // 检查是否有内部的<PLAN>标签
-                val planMatch = Regex("<PLAN>([\\s\\S]*?)</PLAN>").find(thoughtContent)
-                
-                if (planMatch != null) {
-                    val planContent = planMatch.groups[1]?.value?.trim() ?: ""
-                    // 将<PLAN>内容替换为Markdown代码块格式
-                    val processedContent = thoughtContent.replace(planMatch.value, "\n```plan\n$planContent\n```\n")
-                    currentContent = currentContent.replace(match.value, processedContent)
-                } else {
-                    // 如果没有内部PLAN标签，保持原样
-                    currentContent = currentContent
-                }
-            }
-            
-            // 直接处理独立的<PLAN>标签
-            val planMatches = Regex("(?<=^|\\n)<PLAN>([\\s\\S]*?)</PLAN>\\n?").findAll(currentContent).toList()
-            for (match in planMatches) {
-                val planContent = match.groups[1]?.value?.trim() ?: ""
-                val replacement = "\n```plan\n$planContent\n```\n"
-                currentContent = currentContent.replace(match.value, replacement)
-            }
-            
-            return currentContent
-        }
-
         val devinRegexBlock = Regex("(?<=^|\\n)```devin\\n([\\s\\S]*?)\\n```\\n")
         val normalCodeBlock = Regex("\\s*```([\\w#+ ]*)\\n")
 
@@ -272,7 +232,6 @@ class CodeFence(
                 "bash" -> "Shell Script"
                 "http" -> "HTTP Request"
                 "plan" -> "Plain Text"
-                "thought" -> "Plain Text"
                 else -> languageName
             }
 
@@ -318,8 +277,6 @@ class CodeFence(
                 "shell script" -> "sh"
                 "bash" -> "sh"
                 "devin" -> "devin"
-                "plan" -> "plan"
-                "thought" -> "thought"
                 else -> languageId
             }
         }
@@ -349,8 +306,6 @@ class CodeFence(
                 "scala" -> "Scala"
                 "rs" -> "Rust"
                 "http" -> "HTTP Request"
-                "plan" -> "PLAN"
-                "thought" -> "THOUGHT"
                 else -> extension
             }
         }
diff --git a/core/src/main/resources/genius/en/code/plan.devin b/core/src/main/resources/genius/en/code/plan.devin
@@ -34,14 +34,13 @@ $context.toolList
 instead of saying 'I need to use the edit file tool to edit your file', just
 say 'I will edit your file'.
 5. Before calling each tool, first explain to the USER why you are calling it.
-6. Ensure that every write operation (such as `patch`) is enclosed within its own `<devin></devin>` tag.
 </tool_calling>
 
 Here is the rule you should follow:
 
 1. Thoroughly review `<user.question>`. Create an initial plan that includes all the necessary steps to
  resolve `<user.question>`, using the recommended steps provided below,  and incorporating any requirements from
-  the `<user.question>`. Place your plan inside the XML tag `<THOUGHT>` within the sub-tag `<PLAN>`.
+  the `<user.question>`. Place your plan inside the XML tag `<THOUGHT>` within the code language `plan`.
 2. Review the project’s codebase, examining not only its structure but also the specific implementation details, to
    identify all segments that may contribute to or help resolve the issue described in `<user.question>`.
 3. If `<user.question>` describes an error, create a script to reproduce it and run the script to confirm the error.
@@ -60,14 +59,14 @@ first. Be thorough in your thinking process, so it's okay if it is lengthy.
 
 For each step, document your reasoning process inside `<THOUGHT>` tags. Include the following information, enclosed within XML tags:
 
-1. `<PLAN>`: An updated plan incorporating the outcomes from the previous step. Mark progress by adding `✓` after each task in the plan that was fully completed before this step during the **current session**. Use the symbol `!` for tasks that have a latest status as failed, and use `*` for tasks that are currently in progress. If there are sub-tasks, mark their progress statuses as well. Ensure all progress statuses are marked accurately and appropriately reflect the hierarchical relationships of statuses between tasks and sub-tasks. For example, if all sub-tasks are completed, the parent task should also be marked as completed.
+1. `plan`: An updated plan incorporating the outcomes from the previous step. Mark progress by adding `✓` after each task in the plan that was fully completed before this step during the **current session**. Use the symbol `!` for tasks that have a latest status as failed, and use `*` for tasks that are currently in progress. If there are sub-tasks, mark their progress statuses as well. Ensure all progress statuses are marked accurately and appropriately reflect the hierarchical relationships of statuses between tasks and sub-tasks. For example, if all sub-tasks are completed, the parent task should also be marked as completed.
 
 For example:
 
 <THOUGHT>
-<PLAN>
+```plan
 Some plan
-</PLAN>
+```plan
 </THOUGHT>
 
 Here is user.question:
diff --git a/core/src/main/resources/genius/zh/code/plan.devin b/core/src/main/resources/genius/zh/code/plan.devin
@@ -34,14 +34,13 @@ $context.toolList
 instead of saying 'I need to use the edit file tool to edit your file', just
 say 'I will edit your file'.
 5. Before calling each tool, first explain to the USER why you are calling it.
-6. Ensure that every write operation (such as `patch`) is enclosed within its own `<devin></devin>` tag.
 </tool_calling>
 
 Here is the rule you should follow:
 
 1. Thoroughly review `<user.question>`. Create an initial plan that includes all the necessary steps to
  resolve `<user.question>`, using the recommended steps provided below,  and incorporating any requirements from
-  the `<user.question>`. Place your plan inside the XML tag `<THOUGHT>` within the sub-tag `<PLAN>`.
+  the `<user.question>`. Place your plan inside the XML tag `<THOUGHT>` within the code language `plan`.
 2. Review the project’s codebase, examining not only its structure but also the specific implementation details, to
    identify all segments that may contribute to or help resolve the issue described in `<user.question>`.
 3. If `<user.question>` describes an error, create a script to reproduce it and run the script to confirm the error.
@@ -60,15 +59,14 @@ first. Be thorough in your thinking process, so it's okay if it is lengthy.
 
 For each step, document your reasoning process inside `<THOUGHT>` tags. Include the following information, enclosed within XML tags:
 
-1. `<PLAN>`: An updated plan incorporating the outcomes from the previous step. Mark progress by adding `✓` after each task in the plan that was fully completed before this step during the **current session**. Use the symbol `!` for tasks that have a latest status as failed, and use `*` for tasks that are currently in progress. If there are sub-tasks, mark their progress statuses as well. Ensure all progress statuses are marked accurately and appropriately reflect the hierarchical relationships of statuses between tasks and sub-tasks. For example, if all sub-tasks are completed, the parent task should also be marked as completed.
-
+1. `plan`: An updated plan incorporating the outcomes from the previous step. Mark progress by adding `✓` after each task in the plan that was fully completed before this step during the **current session**. Use the symbol `!` for tasks that have a latest status as failed, and use `*` for tasks that are currently in progress. If there are sub-tasks, mark their progress statuses as well. Ensure all progress statuses are marked accurately and appropriately reflect the hierarchical relationships of statuses between tasks and sub-tasks. For example, if all sub-tasks are completed, the parent task should also be marked as completed.
 
 For example:
 
 <THOUGHT>
-<PLAN>
-1.
-</PLAN>
+```plan
+Some plan
+```plan
 </THOUGHT>
 
 Here is user.question:
diff --git a/core/src/test/kotlin/cc/unitmesh/devti/parser/CodeFenceTest.kt b/core/src/test/kotlin/cc/unitmesh/devti/parser/CodeFenceTest.kt
@@ -557,109 +557,18 @@ Index: src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java
         assertEquals(codeFences.size, 1)
     }
 
-    fun testShouldParsePlanTag() {
-        val content = """
-            <PLAN>
-            1. 领域模型重构：
-            - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
-            - 添加领域行为方法（发布、审核、评论等）
-            - 引入值对象（BlogId、Content等）
-            </PLAN>
-        """.trimIndent()
-
-        val code = CodeFence.parse(content)
-        assertEquals(
-            code.text,
-            """
-            1. 领域模型重构：
-            - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
-            - 添加领域行为方法（发布、审核、评论等）
-            - 引入值对象（BlogId、Content等）
-            """.trimIndent()
-        )
-        assertTrue(code.isComplete)
-        assertEquals("plan", code.extension)
-        assertEquals("plan", code.originLanguage)
-    }
-
-    fun testShouldParseThoughtTag() {
-        val content = """
-            <THOUGHT>
-            我需要实现博客系统的领域模型重构，主要包括以下步骤：
-            1. 将BlogPost合并到Blog聚合根
-            2. 添加领域行为
-            </THOUGHT>
-        """.trimIndent()
-
-        val code = CodeFence.parse(content)
-        assertEquals(
-            code.text,
-            """
-            我需要实现博客系统的领域模型重构，主要包括以下步骤：
-            1. 将BlogPost合并到Blog聚合根
-            2. 添加领域行为
-            """.trimIndent()
-        )
-        assertTrue(code.isComplete)
-        assertEquals("thought", code.extension)
-        assertEquals("thought", code.originLanguage)
-    }
-
-    fun testShouldParseNestedPlanInThought() {
-        val content = """
-            <THOUGHT>
-            我需要对系统进行重构，首先列出计划：
-            
-            <PLAN>
-            1. 领域模型重构：
-            - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
-            - 添加领域行为方法（发布、审核、评论等）
-            
-            2. 分层结构调整：
-            - 清理entity层冗余对象，建立清晰的domain层
-            - 实现领域服务与基础设施层分离
-            </PLAN>
-            
-            然后按照计划实施重构。
-            </THOUGHT>
-        """.trimIndent()
-
-        val code = CodeFence.parse(content)
-        assertEquals(
-            code.text,
-            """
-            1. 领域模型重构：
-            - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
-            - 添加领域行为方法（发布、审核、评论等）
-            
-            2. 分层结构调整：
-            - 清理entity层冗余对象，建立清晰的domain层
-            - 实现领域服务与基础设施层分离
-            """.trimIndent()
-        )
-        assertTrue(code.isComplete)
-        assertEquals("plan", code.extension)
-        assertEquals("plan", code.originLanguage)
-    }
-
     fun testShouldParseAllWithXmlTags() {
         val content = """
             首先，我需要思考重构的步骤：
             
-            <THOUGHT>
-            系统重构需要考虑领域驱动设计原则，确保聚合根的完整性。
-            
-            <PLAN>
+            ```plan
             1. 领域模型重构：
             - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
             - 添加领域行为方法（发布、审核、评论等）
             
             2. 分层结构调整：
             - 清理entity层冗余对象
-            </PLAN>
-            
-            这样的重构可以提高系统的内聚性。
-            </THOUGHT>
+            ```
             
             然后，我们可以开始代码实现：
             
@@ -678,29 +587,13 @@ Index: src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java
             <devin>
             /patch
             ```patch
-            Index: src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java
-            --- src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java	(revision 1)
-            +++ src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java	(revision 2)
-            ```
-            </devin>
-            
-            <PLAN>
-            3. 战术模式实现：
-            - 使用工厂模式处理复杂对象创建
-            - 实现仓储接口与领域层的依赖倒置
-            </PLAN>
         """.trimIndent()
 
         val codeFences = CodeFence.parseAll(content)
-        assertEquals(6, codeFences.size)
+        assertEquals(5, codeFences.size)
         
-        // 检查第一个代码段是Markdown内容
-        assertEquals("首先，我需要思考重构的步骤：\n" +
-                "\n" +
-                "\n" +
-                "系统重构需要考虑领域驱动设计原则，确保聚合根的完整性。", codeFences[0].text)
+        assertEquals("首先，我需要思考重构的步骤：", codeFences[0].text)
         
-        // 检查第二个代码段是plan内容（嵌套在THOUGHT中的PLAN）
         assertEquals(
             """
             1. 领域模型重构：
@@ -714,18 +607,22 @@ Index: src/main/java/cc/unitmesh/untitled/demo/repository/BlogRepository.java
         )
         assertEquals("plan", codeFences[1].extension)
         
-        // 检查第三个代码段是Java代码
-        assertEquals("这样的重构可以提高系统的内聚性。\n\n然后，我们可以开始代码实现：", codeFences[2].text)
-
-        // 检查第四个代码段是单独的PLAN标签内容
+        assertEquals("然后，我们可以开始代码实现：", codeFences[2].text)
         assertEquals(
             """
-            3. 战术模式实现：
-            - 使用工厂模式处理复杂对象创建
-            - 实现仓储接口与领域层的依赖倒置
-            """.trimIndent(),
+            public class Blog {
+                private BlogId id;
+                private String title;
+                private String content;
+                
+                public void publish() {
+                    // 实现发布逻辑
+                }
+            }""".trimIndent(),
             codeFences[3].text
         )
-        assertEquals("plan", codeFences[3].extension)
+        assertEquals("java", codeFences[3].extension)
+
+        assertEquals("DevIn", codeFences[4].originLanguage)
     }
 }
diff --git a/core/src/test/kotlin/cc/unitmesh/devti/util/parser/MarkdownToHtmlConverterTest.kt b/core/src/test/kotlin/cc/unitmesh/devti/util/parser/MarkdownToHtmlConverterTest.kt
@@ -11,8 +11,7 @@ class MarkdownConverterTest {
     fun should_convert_simple_markdown_to_html() {
         // Given
         val markdownText = """
-            <THOUGHT>
-            <PLAN>
+            ```
             1. 领域模型重构：
             - 将BlogPost实体合并到Blog聚合根，建立完整的领域对象
             - 添加领域行为方法（发布、审核、评论等）
@@ -31,10 +30,9 @@ class MarkdownConverterTest {
             4. 测试保障：
             - 重构单元测试，验证领域模型行为
             - 添加聚合根不变性约束测试
-            </PLAN>
-            </THOUGHT>
+            ```
             """.trimIndent()
-        val expectedHtml = "<pre><code class=\"language-markdown\">\n" +
+        val expectedHtml = "<pre><code class=\"language-plan\">\n" +
                 "1. 领域模型重构：\n" +
                 "- 将BlogPost实体合并到Blog聚合根，建立完整的领域对象\n" +
                 "- 添加领域行为方法（发布、审核、评论等）\n" +