Skip to content

Commit 4e8f6d3

Browse files
committed
feat(custom): update key names in CustomSSEProcessor and add token count support #51
- Update key names in CustomSSEProcessor to use "content" instead of "message" and "role" instead of "user" for consistency. - Add support for token count in Tokenizer interface and implementation. Use the `IntArrayList` type for improved performance. - Upgrade the markdown library to version 0.6.1. - Add template engine and http request/response dependencies. - Update `jtokkit` library version to 1.0.0 for token count functionality. - Add dependency for GitLab API. - Add plugin for IntelliJ IDEA. Total characters: 270
1 parent d3d6f05 commit 4e8f6d3

File tree

4 files changed

+14
-14
lines changed

4 files changed

+14
-14
lines changed

build.gradle.kts

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -390,21 +390,14 @@ project(":") {
390390
plugins.set(ideaPlugins)
391391
}
392392

393-
// sourceSets {
394-
// main {
395-
// resources.srcDirs("src/main/resources")
396-
// resources.srcDirs("src/$platformVersion/main/resources")
397-
// }
398-
// }
399-
400393
dependencies {
401394
implementation(libs.bundles.openai)
402395
implementation(libs.bundles.markdown)
403396
implementation(libs.yaml)
404397

405398
implementation(libs.json.pathkt)
406399

407-
implementation("org.jetbrains:markdown:0.5.1")
400+
implementation("org.jetbrains:markdown:0.6.1")
408401
implementation(libs.kotlinx.serialization.json)
409402

410403
implementation("cc.unitmesh:cocoa-core:0.4.5")
@@ -414,11 +407,14 @@ project(":") {
414407
implementation(libs.github.api)
415408
implementation("org.gitlab4j:gitlab4j-api:5.3.0")
416409

410+
// template engine
417411
implementation("org.apache.velocity:velocity-engine-core:2.3")
418412

413+
// http request/response
419414
implementation(libs.jackson.module.kotlin)
420415

421-
implementation("com.knuddels:jtokkit:0.6.1")
416+
// token count
417+
implementation("com.knuddels:jtokkit:1.0.0")
422418

423419
// junit
424420
testImplementation("io.kotest:kotest-assertions-core:5.7.2")

src/main/kotlin/cc/unitmesh/devti/llms/custom/CustomSSEProcessor.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,17 @@ fun JsonObject.updateCustomBody(customRequest: String): JsonObject {
149149

150150
// TODO clean code with magic literals
151151
var roleKey = "role"
152-
var contentKey = "message"
152+
var contentKey = "content"
153153
customRequestJson.jsonObject["messageKeys"]?.let {
154154
roleKey = it.jsonObject["role"]?.jsonPrimitive?.content ?: "role"
155-
contentKey = it.jsonObject["content"]?.jsonPrimitive?.content ?: "message"
155+
contentKey = it.jsonObject["content"]?.jsonPrimitive?.content ?: "content"
156156
}
157157

158158
val messages: JsonArray = this@updateCustomBody["messages"]?.jsonArray ?: buildJsonArray { }
159159
this.put("messages", buildJsonArray {
160160
messages.forEach { message ->
161161
val role: String = message.jsonObject["role"]?.jsonPrimitive?.content ?: "user"
162-
val content: String = message.jsonObject["message"]?.jsonPrimitive?.content ?: ""
162+
val content: String = message.jsonObject["content"]?.jsonPrimitive?.content ?: ""
163163
add(buildJsonObject {
164164
put(roleKey, role)
165165
put(contentKey, content)
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package cc.unitmesh.devti.llms.tokenizer
22

3+
import com.knuddels.jtokkit.api.IntArrayList
4+
35
interface Tokenizer {
46
fun getMaxLength(): Int
57
fun count(string: String): Int
6-
fun tokenize(chunk: String): List<Int>
8+
fun tokenize(chunk: String): IntArrayList?
79
}

src/main/kotlin/cc/unitmesh/devti/llms/tokenizer/TokenizerImpl.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import com.knuddels.jtokkit.Encodings
66
import com.knuddels.jtokkit.api.Encoding
77
import com.knuddels.jtokkit.api.EncodingRegistry
88
import com.knuddels.jtokkit.api.EncodingType
9+
import com.knuddels.jtokkit.api.IntArrayList
910

1011
@Service(Service.Level.APP)
1112
class TokenizerImpl(private val maxTokenLength: Int = 8192) : Tokenizer {
@@ -15,7 +16,8 @@ class TokenizerImpl(private val maxTokenLength: Int = 8192) : Tokenizer {
1516
override fun getMaxLength(): Int = maxTokenLength
1617

1718
override fun count(string: String): Int = encoding.countTokens(string)
18-
override fun tokenize(chunk: String): List<Int> {
19+
20+
override fun tokenize(chunk: String): IntArrayList? {
1921
return encoding.encode(chunk, maxTokenLength).tokens
2022
}
2123

0 commit comments

Comments
 (0)