add completion streaming support

CJCrafter · CJCrafter · commit 15fbf5883a99 · 2023-03-31T01:35:00.000-04:00
diff --git a/src/main/kotlin/com/cjcrafter/openai/OpenAI.kt b/src/main/kotlin/com/cjcrafter/openai/OpenAI.kt
@@ -4,6 +4,7 @@ import com.cjcrafter.openai.gson.ChatChoiceChunkAdapter
 import com.cjcrafter.openai.chat.*
 import com.cjcrafter.openai.completions.CompletionRequest
 import com.cjcrafter.openai.completions.CompletionResponse
+import com.cjcrafter.openai.completions.CompletionResponseChunk
 import com.cjcrafter.openai.exception.OpenAIError
 import com.cjcrafter.openai.exception.WrappedIOError
 import com.cjcrafter.openai.gson.ChatUserAdapter
@@ -16,6 +17,7 @@ import okhttp3.*
 import okhttp3.MediaType.Companion.toMediaType
 import okhttp3.RequestBody.Companion.toRequestBody
 import java.io.IOException
+import java.lang.IllegalStateException
 import java.util.function.Consumer
 
 /**
@@ -56,22 +58,27 @@ class OpenAI @JvmOverloads constructor(
             .post(body).build()
     }
 
+    /**
+     * Create completion
+     *
+     * @param request
+     * @return
+     * @since 1.3.0
+     */
     @Throws(OpenAIError::class)
     fun createCompletion(request: CompletionRequest): CompletionResponse {
         @Suppress("DEPRECATION")
         request.stream = false // use streamCompletion for stream=true
         val httpRequest = buildRequest(request, "completions")
 
-        // Save the JsonObject to check for errors
-        var rootObject: JsonObject?
         try {
             client.newCall(httpRequest).execute().use { response ->
 
                 // Servers respond to API calls with json blocks. Since raw JSON isn't
                 // very developer friendly, we wrap for easy data access.
-                rootObject = JsonParser.parseString(response.body!!.string()).asJsonObject
-                if (rootObject!!.has("error"))
-                    throw OpenAIError.fromJson(rootObject!!.get("error").asJsonObject)
+                val rootObject = JsonParser.parseString(response.body!!.string()).asJsonObject
+                if (rootObject.has("error"))
+                    throw OpenAIError.fromJson(rootObject.get("error").asJsonObject)
 
                 return gson.fromJson(rootObject, CompletionResponse::class.java)
             }
@@ -81,6 +88,78 @@ class OpenAI @JvmOverloads constructor(
         }
     }
 
+    /**
+     * Helper method to call [streamCompletion].
+     *
+     * @param request    The input information for ChatGPT.
+     * @param onResponse The method to call for each chunk.
+     * @since 1.3.0
+     */
+    fun streamCompletionKotlin(request: CompletionRequest, onResponse: CompletionResponseChunk.() -> Unit) {
+        streamCompletion(request, { it.onResponse() })
+    }
+
+    /**
+     * This method does not block the thread. Method calls to [onResponse] are
+     * not handled by the main thread. It is crucial to consider thread safety
+     * within the context of your program.
+     *
+     * @param request    The input information for ChatGPT.
+     * @param onResponse The method to call for each chunk.
+     * @param onFailure  The method to call if the HTTP fails. This method will
+     *                   not be called if OpenAI returns an error.
+     * @see createCompletion
+     * @see streamCompletionKotlin
+     * @since 1.3.0
+     */
+    @JvmOverloads
+    fun streamCompletion(
+        request: CompletionRequest,
+        onResponse: Consumer<CompletionResponseChunk>, // use Consumer instead of Kotlin for better Java syntax
+        onFailure: Consumer<OpenAIError> = Consumer { it.printStackTrace() }
+    ) {
+        @Suppress("DEPRECATION")
+        request.stream = true // use requestResponse for stream=false
+        val httpRequest = buildRequest(request, "completions")
+
+        client.newCall(httpRequest).enqueue(object : Callback {
+
+            override fun onFailure(call: Call, e: IOException) {
+                onFailure.accept(WrappedIOError(e))
+            }
+
+            override fun onResponse(call: Call, response: Response) {
+                response.body?.source()?.use { source ->
+                    while (!source.exhausted()) {
+
+                        // Parse the JSON string as a map. Every string starts
+                        // with "data: ", so we need to remove that.
+                        var jsonResponse = source.readUtf8Line() ?: continue
+                        if (jsonResponse.isEmpty())
+                            continue
+
+                        // TODO comment
+                        if (!jsonResponse.startsWith("data: ")) {
+                            System.err.println(jsonResponse)
+                            continue
+                        }
+
+                        jsonResponse = jsonResponse.substring("data: ".length)
+                        if (jsonResponse == "[DONE]")
+                            continue
+
+                        val rootObject = JsonParser.parseString(jsonResponse).asJsonObject
+                        if (rootObject.has("error"))
+                            throw OpenAIError.fromJson(rootObject.get("error").asJsonObject)
+
+                        val cache = gson.fromJson(rootObject, CompletionResponseChunk::class.java)
+                        onResponse.accept(cache)
+                    }
+                }
+            }
+        })
+    }
+
     /**
      * Blocks the current thread until OpenAI responds to https request. The
      * returned value includes information including tokens, generated text,
@@ -97,16 +176,14 @@ class OpenAI @JvmOverloads constructor(
         request.stream = false // use streamResponse for stream=true
         val httpRequest = buildRequest(request, "chat/completions")
 
-        // Save the JsonObject to check for errors
-        var rootObject: JsonObject?
         try {
             client.newCall(httpRequest).execute().use { response ->
 
                 // Servers respond to API calls with json blocks. Since raw JSON isn't
                 // very developer friendly, we wrap for easy data access.
-                rootObject = JsonParser.parseString(response.body!!.string()).asJsonObject
-                if (rootObject!!.has("error"))
-                    throw OpenAIError.fromJson(rootObject!!.get("error").asJsonObject)
+                val rootObject = JsonParser.parseString(response.body!!.string()).asJsonObject
+                if (rootObject.has("error"))
+                    throw OpenAIError.fromJson(rootObject.get("error").asJsonObject)
 
                 return gson.fromJson(rootObject, ChatResponse::class.java)
             }
@@ -176,7 +253,7 @@ class OpenAI @JvmOverloads constructor(
     fun streamChatCompletion(
         request: ChatRequest,
         onResponse: Consumer<ChatResponseChunk>, // use Consumer instead of Kotlin for better Java syntax
-        onFailure: Consumer<IOException> = Consumer { it.printStackTrace() }
+        onFailure: Consumer<WrappedIOError> = Consumer { it.printStackTrace() }
     ) {
         @Suppress("DEPRECATION")
         request.stream = true // use requestResponse for stream=false
@@ -186,7 +263,7 @@ class OpenAI @JvmOverloads constructor(
             var cache: ChatResponseChunk? = null
 
             override fun onFailure(call: Call, e: IOException) {
-                onFailure.accept(e)
+                onFailure.accept(WrappedIOError(e))
             }
 
             override fun onResponse(call: Call, response: Response) {
@@ -203,6 +280,9 @@ class OpenAI @JvmOverloads constructor(
                             continue
 
                         val rootObject = JsonParser.parseString(jsonResponse).asJsonObject
+                        if (rootObject.has("error"))
+                            throw OpenAIError.fromJson(rootObject.get("error").asJsonObject)
+
                         if (cache == null)
                             cache = gson.fromJson(rootObject, ChatResponseChunk::class.java)
                         else
diff --git a/src/main/kotlin/com/cjcrafter/openai/completions/CompletionChoiceChunk.kt b/src/main/kotlin/com/cjcrafter/openai/completions/CompletionChoiceChunk.kt
@@ -0,0 +1,28 @@
+package com.cjcrafter.openai.completions
+
+import com.cjcrafter.openai.FinishReason
+import com.google.gson.annotations.SerializedName
+
+/**
+ * The OpenAI API returns a list of `CompletionChoice`. Each choice has a
+ * generated message ([CompletionChoice.text]) and a finish reason
+ * ([CompletionChoice.finishReason]). For most use cases, you only need the
+ * generated text.
+ *
+ * By default, only 1 choice is generated (since [CompletionRequest.n] == 1).
+ * When you increase `n` or provide a list of prompts (called batching),
+ * there will be multiple choices.
+ *
+ * @property text The few generated tokens.
+ * @property index The index in the list... This is 0 for most use cases.
+ * @property logprobs List of logarithmic probabilities for each token in the generated text.
+ * @property finishReason The reason the bot stopped generating tokens.
+ * @constructor Create empty Completion choice, for internal usage.
+ * @see FinishReason
+ */
+data class CompletionChoiceChunk(
+    val text: String,
+    val index: Int,
+    val logprobs: List<Float>?,
+    @field:SerializedName("finish_reason") val finishReason: FinishReason?
+)
diff --git a/src/main/kotlin/com/cjcrafter/openai/completions/CompletionResponseChunk.kt b/src/main/kotlin/com/cjcrafter/openai/completions/CompletionResponseChunk.kt
@@ -0,0 +1,64 @@
+package com.cjcrafter.openai.completions
+
+import java.time.Instant
+import java.time.ZoneId
+import java.time.ZonedDateTime
+import java.util.*
+
+/**
+ * The `CompletionResponse` contains all the data returned by the OpenAI Completions
+ * API. For most use cases, [CompletionResponse.get] (passing 0 to the index argument)
+ * is all you need.
+ *
+ * @property id      The unique id for your request.
+ * @property created The Unix timestamp (measured in seconds since 00:00:00 UTC on Junuary 1, 1970) when the API response was created.
+ * @property model   The model used to generate the completion.
+ * @property choices The generated completion(s).
+ * @constructor Create Completion response (for internal usage)
+ */
+data class CompletionResponseChunk(
+    val id: String,
+    val created: Long,
+    val model: String,
+    val choices: List<CompletionChoiceChunk>,
+) {
+
+    /**
+     * Returns the [Instant] time that the OpenAI Completion API sent this response.
+     * The time is measured as a unix timestamp (measured in seconds since
+     * 00:00:00 UTC on January 1, 1970).
+     *
+     * Note that users expect time to be measured in their timezone, so
+     * [getZonedTime] is preferred.
+     *
+     * @return The instant the api created this response.
+     * @see getZonedTime
+     */
+    fun getTime(): Instant {
+        return Instant.ofEpochSecond(created)
+    }
+
+    /**
+     * Returns the time-zoned instant that the OpenAI Completion API sent this
+     * response. By default, this method uses the system's timezone.
+     *
+     * @param timezone The user's timezone.
+     * @return The timezone adjusted date time.
+     * @see TimeZone.getDefault
+     */
+    @JvmOverloads
+    fun getZonedTime(timezone: ZoneId = TimeZone.getDefault().toZoneId()): ZonedDateTime {
+        return ZonedDateTime.ofInstant(getTime(), timezone)
+    }
+
+    /**
+     * Shorthand for accessing the generated messages (shorthand for
+     * [CompletionResponseChunk.choices]).
+     *
+     * @param index The index of the message.
+     * @return The generated [CompletionChoiceChunk] at the index.
+     */
+    operator fun get(index: Int): CompletionChoiceChunk {
+        return choices[index]
+    }
+}
diff --git a/src/test/kotlin/KotlinCompletionStreamTest.kt b/src/test/kotlin/KotlinCompletionStreamTest.kt
@@ -0,0 +1,19 @@
+import com.cjcrafter.openai.OpenAI
+import com.cjcrafter.openai.completions.CompletionRequest
+import io.github.cdimascio.dotenv.dotenv
+
+fun main(args: Array<String>) {
+
+    // Prepare the ChatRequest
+    val request = CompletionRequest(model="davinci", prompt="Hello darkness", maxTokens = 1024)
+
+    // Loads the API key from the .env file in the root directory.
+    val key = dotenv()["OPENAI_TOKEN"]
+    val openai = OpenAI(key)
+
+    // Generate a response, and print it to the user
+    //println(openai.createCompletion(request))
+    openai.streamCompletionKotlin(request) {
+        print(choices[0].text)
+    }
+}