Skip to content

Commit c5a432d

Browse files
Add GPT-4V support in ChatService (#272)
## Purpose <!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does it add? --> * ... #257 ## Does this introduce a breaking change? <!-- Mark one with an "x". --> ``` [ ] Yes [ ] No ``` ## Pull Request Type What kind of change does this Pull Request introduce? <!-- Please check the one that applies to this PR using "x". --> ``` [ ] Bugfix [ ] Feature [ ] Code style update (formatting, local variables) [ ] Refactoring (no functional changes, no api changes) [ ] Documentation content changes [ ] Other... Please describe: ``` ## How to Test * Get the code ``` git clone [repo-address] cd [repo-name] git checkout [branch-name] npm install ``` * Test the code <!-- Add steps to run the tests suite and/or manually test --> ``` ``` ## What to Check Verify that the following are valid * ... ## Other Information <!-- Add any other helpful information that may be needed here. -->
1 parent f5b168f commit c5a432d

File tree

11 files changed

+208
-92
lines changed

11 files changed

+208
-92
lines changed

app/Directory.Packages.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
</PropertyGroup>
55
<ItemGroup>
66
<PackageVersion Include="Azure.AI.FormRecognizer" Version="4.1.0" />
7-
<PackageVersion Include="Azure.AI.OpenAI" Version="1.0.0-beta.8" />
7+
<PackageVersion Include="Azure.AI.OpenAI" Version="1.0.0-beta.12" />
88
<PackageVersion Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.3.0" />
99
<PackageVersion Include="Azure.Identity" Version="1.10.4" />
1010
<PackageVersion Include="Azure.Search.Documents" Version="11.5.1" />
@@ -33,7 +33,7 @@
3333
<PackageVersion Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="8.0.0" />
3434
<PackageVersion Include="Microsoft.ML" Version="3.0.0" />
3535
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
36-
<PackageVersion Include="Microsoft.SemanticKernel" Version="0.24.230918.1-preview" />
36+
<PackageVersion Include="Microsoft.SemanticKernel" Version="1.3.0" />
3737
<PackageVersion Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.19.5" />
3838
<PackageVersion Include="MudBlazor" Version="6.11.1" />
3939
<PackageVersion Include="PdfSharpCore" Version="1.3.62" />

app/backend/Extensions/WebApplicationExtensions.cs

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,36 +44,28 @@ private static async IAsyncEnumerable<ChatChunkResponse> OnPostChatPromptAsync(
4444
{
4545
var deploymentId = config["AZURE_OPENAI_CHATGPT_DEPLOYMENT"];
4646
var response = await client.GetChatCompletionsStreamingAsync(
47-
deploymentId, new ChatCompletionsOptions
47+
new ChatCompletionsOptions
4848
{
49+
DeploymentName = deploymentId,
4950
Messages =
5051
{
51-
new ChatMessage(ChatRole.System, """
52+
new ChatRequestSystemMessage("""
5253
You're an AI assistant for developers, helping them write code more efficiently.
5354
You're name is **Blazor 📎 Clippy** and you're an expert Blazor developer.
5455
You're also an expert in ASP.NET Core, C#, TypeScript, and even JavaScript.
5556
You will always reply with a Markdown formatted response.
5657
"""),
57-
58-
new ChatMessage(ChatRole.User, "What's your name?"),
59-
60-
new ChatMessage(ChatRole.Assistant,
61-
"Hi, my name is **Blazor 📎 Clippy**! Nice to meet you."),
62-
63-
new ChatMessage(ChatRole.User, prompt.Prompt)
58+
new ChatRequestUserMessage("What's your name?"),
59+
new ChatRequestAssistantMessage("Hi, my name is **Blazor 📎 Clippy**! Nice to meet you."),
60+
new ChatRequestUserMessage(prompt.Prompt)
6461
}
6562
}, cancellationToken);
6663

67-
using var completions = response.Value;
68-
await foreach (var choice in completions.GetChoicesStreaming(cancellationToken))
64+
await foreach (var choice in response.WithCancellation(cancellationToken))
6965
{
70-
await foreach (var message in choice.GetMessageStreaming(cancellationToken))
66+
if (choice.ContentUpdate is { Length: > 0 })
7167
{
72-
if (message is { Content.Length: > 0 })
73-
{
74-
var (length, content) = (message.Content.Length, message.Content);
75-
yield return new ChatChunkResponse(length, content);
76-
}
68+
yield return new ChatChunkResponse(choice.ContentUpdate.Length, choice.ContentUpdate);
7769
}
7870
}
7971
}

app/backend/GlobalUsings.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
global using Microsoft.AspNetCore.Mvc;
1414
global using Microsoft.AspNetCore.Mvc.RazorPages;
1515
global using Microsoft.SemanticKernel;
16-
global using Microsoft.SemanticKernel.AI.ChatCompletion;
17-
global using Microsoft.SemanticKernel.AI.Embeddings;
1816
global using MinimalApi.Extensions;
1917
global using MinimalApi.Services;
2018
global using PdfSharpCore.Pdf;

app/backend/Services/ReadRetrieveReadChatService.cs

Lines changed: 97 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,59 @@
11
// Copyright (c) Microsoft. All rights reserved.
22

3-
namespace MinimalApi.Services;
3+
using Azure.Core;
4+
using Microsoft.SemanticKernel.ChatCompletion;
5+
using Microsoft.SemanticKernel.Connectors.OpenAI;
6+
using Microsoft.SemanticKernel.Embeddings;
47

8+
namespace MinimalApi.Services;
9+
#pragma warning disable SKEXP0011 // Mark members as static
10+
#pragma warning disable SKEXP0001 // Mark members as static
511
public class ReadRetrieveReadChatService
612
{
713
private readonly ISearchService _searchClient;
8-
private readonly IKernel _kernel;
14+
private readonly Kernel _kernel;
915
private readonly IConfiguration _configuration;
16+
private readonly IComputerVisionService? _visionService;
17+
private readonly TokenCredential? _tokenCredential;
1018

1119
public ReadRetrieveReadChatService(
1220
ISearchService searchClient,
1321
OpenAIClient client,
14-
IConfiguration configuration)
22+
IConfiguration configuration,
23+
IComputerVisionService? visionService = null,
24+
TokenCredential? tokenCredential = null)
1525
{
1626
_searchClient = searchClient;
17-
var deployedModelName = configuration["AzureOpenAiChatGptDeployment"];
18-
ArgumentNullException.ThrowIfNullOrWhiteSpace(deployedModelName);
27+
var kernelBuilder = Kernel.CreateBuilder();
1928

20-
var kernelBuilder = Kernel.Builder.WithAzureChatCompletionService(deployedModelName, client);
21-
var embeddingModelName = configuration["AzureOpenAiEmbeddingDeployment"];
22-
if (!string.IsNullOrEmpty(embeddingModelName))
29+
if (configuration["UseAOAI"] != "true")
2330
{
24-
var endpoint = configuration["AzureOpenAiServiceEndpoint"];
25-
ArgumentNullException.ThrowIfNullOrWhiteSpace(endpoint);
26-
kernelBuilder = kernelBuilder.WithAzureTextEmbeddingGenerationService(embeddingModelName, endpoint, new DefaultAzureCredential());
31+
var deployment = configuration["OpenAiChatGptDeployment"];
32+
ArgumentNullException.ThrowIfNullOrWhiteSpace(deployment);
33+
kernelBuilder = kernelBuilder.AddOpenAIChatCompletion(deployment, client);
34+
35+
var embeddingModelName = configuration["OpenAiEmbeddingDeployment"];
36+
ArgumentNullException.ThrowIfNullOrWhiteSpace(embeddingModelName);
37+
kernelBuilder = kernelBuilder.AddOpenAITextEmbeddingGeneration(embeddingModelName, client);
38+
}
39+
else
40+
{
41+
var deployedModelName = configuration["AzureOpenAiChatGptDeployment"];
42+
ArgumentNullException.ThrowIfNullOrWhiteSpace(deployedModelName);
43+
var embeddingModelName = configuration["AzureOpenAiEmbeddingDeployment"];
44+
if (!string.IsNullOrEmpty(embeddingModelName))
45+
{
46+
var endpoint = configuration["AzureOpenAiServiceEndpoint"];
47+
ArgumentNullException.ThrowIfNullOrWhiteSpace(endpoint);
48+
kernelBuilder = kernelBuilder.AddAzureOpenAITextEmbeddingGeneration(embeddingModelName, endpoint, tokenCredential ?? new DefaultAzureCredential());
49+
kernelBuilder = kernelBuilder.AddAzureOpenAIChatCompletion(deployedModelName, endpoint, tokenCredential ?? new DefaultAzureCredential());
50+
}
2751
}
52+
2853
_kernel = kernelBuilder.Build();
2954
_configuration = configuration;
55+
_visionService = visionService;
56+
_tokenCredential = tokenCredential;
3057
}
3158

3259
public async Task<ApproachResponse> ReplyAsync(
@@ -39,8 +66,8 @@ public async Task<ApproachResponse> ReplyAsync(
3966
var useSemanticRanker = overrides?.SemanticRanker ?? false;
4067
var excludeCategory = overrides?.ExcludeCategory ?? null;
4168
var filter = excludeCategory is null ? null : $"category ne '{excludeCategory}'";
42-
IChatCompletion chat = _kernel.GetService<IChatCompletion>();
43-
ITextEmbeddingGeneration? embedding = _kernel.GetService<ITextEmbeddingGeneration>();
69+
var chat = _kernel.GetRequiredService<IChatCompletionService>();
70+
var embedding = _kernel.GetRequiredService<ITextEmbeddingGenerationService>();
4471
float[]? embeddings = null;
4572
var question = history.LastOrDefault()?.User is { } userQuestion
4673
? userQuestion
@@ -55,24 +82,19 @@ public async Task<ApproachResponse> ReplyAsync(
5582
string? query = null;
5683
if (overrides?.RetrievalMode != RetrievalMode.Vector)
5784
{
58-
var getQueryChat = chat.CreateNewChat(@"You are a helpful AI assistant, generate search query for followup question.
85+
var getQueryChat = new ChatHistory(@"You are a helpful AI assistant, generate search query for followup question.
5986
Make your respond simple and precise. Return the query only, do not return any other text.
6087
e.g.
6188
Northwind Health Plus AND standard plan.
6289
standard plan AND dental AND employee benefit.
6390
");
6491

6592
getQueryChat.AddUserMessage(question);
66-
var result = await chat.GetChatCompletionsAsync(
93+
var result = await chat.GetChatMessageContentAsync(
6794
getQueryChat,
6895
cancellationToken: cancellationToken);
6996

70-
if (result.Count != 1)
71-
{
72-
throw new InvalidOperationException("Failed to get search query");
73-
}
74-
75-
query = result[0].ModelResult.GetOpenAIChatResult().Choice.Message.Content;
97+
query = result.Content ?? throw new InvalidOperationException("Failed to get search query");
7698
}
7799

78100
// step 2
@@ -89,12 +111,19 @@ standard plan AND dental AND employee benefit.
89111
documentContents = string.Join("\r", documentContentList.Select(x =>$"{x.Title}:{x.Content}"));
90112
}
91113

92-
Console.WriteLine(documentContents);
114+
// step 2.5
115+
// retrieve images if _visionService is available
116+
SupportingImageRecord[]? images = default;
117+
if (_visionService is not null)
118+
{
119+
var queryEmbeddings = await _visionService.VectorizeTextAsync(query ?? question, cancellationToken);
120+
images = await _searchClient.QueryImagesAsync(query, queryEmbeddings.vector, overrides, cancellationToken);
121+
}
122+
93123
// step 3
94124
// put together related docs and conversation history to generate answer
95-
var answerChat = chat.CreateNewChat(
96-
"You are a system assistant who helps the company employees with their healthcare " +
97-
"plan questions, and questions about the employee handbook. Be brief in your answers");
125+
var answerChat = new ChatHistory(
126+
"You are a system assistant who helps the company employees with their questions. Be brief in your answers");
98127

99128
// add chat history
100129
foreach (var turn in history)
@@ -106,22 +135,56 @@ standard plan AND dental AND employee benefit.
106135
}
107136
}
108137

109-
// format prompt
110-
answerChat.AddUserMessage(@$" ## Source ##
138+
139+
if (images != null)
140+
{
141+
var prompt = @$"## Source ##
142+
{documentContents}
143+
## End ##
144+
145+
Answer question based on available source and images.
146+
Your answer needs to be a json object with answer and thoughts field.
147+
Don't put your answer between ```json and ```, return the json string directly. e.g {{""answer"": ""I don't know"", ""thoughts"": ""I don't know""}}";
148+
149+
var tokenRequestContext = new TokenRequestContext(new[] { "https://storage.azure.com/.default" });
150+
var sasToken = await (_tokenCredential?.GetTokenAsync(tokenRequestContext, cancellationToken) ?? throw new InvalidOperationException("Failed to get token"));
151+
var sasTokenString = sasToken.Token;
152+
var imageUrls = images.Select(x => $"{x.Url}?{sasTokenString}").ToArray();
153+
var collection = new ChatMessageContentItemCollection();
154+
collection.Add(new TextContent(prompt));
155+
foreach (var imageUrl in imageUrls)
156+
{
157+
collection.Add(new ImageContent(new Uri(imageUrl)));
158+
}
159+
160+
answerChat.AddUserMessage(collection);
161+
}
162+
else
163+
{
164+
var prompt = @$" ## Source ##
111165
{documentContents}
112166
## End ##
113167
114168
You answer needs to be a json object with the following format.
115169
{{
116170
""answer"": // the answer to the question, add a source reference to the end of each sentence. e.g. Apple is a fruit [reference1.pdf][reference2.pdf]. If no source available, put the answer as I don't know.
117171
""thoughts"": // brief thoughts on how you came up with the answer, e.g. what sources you used, what you thought about, etc.
118-
}}");
172+
}}";
173+
answerChat.AddUserMessage(prompt);
174+
}
175+
176+
var promptExecutingSetting = new OpenAIPromptExecutionSettings
177+
{
178+
MaxTokens = 1024,
179+
Temperature = overrides?.Temperature ?? 0.7,
180+
};
119181

120182
// get answer
121-
var answer = await chat.GetChatCompletionsAsync(
183+
var answer = await chat.GetChatMessageContentAsync(
122184
answerChat,
185+
promptExecutingSetting,
123186
cancellationToken: cancellationToken);
124-
var answerJson = answer[0].ModelResult.GetOpenAIChatResult().Choice.Message.Content;
187+
var answerJson = answer.Content ?? throw new InvalidOperationException("Failed to get search query");
125188
var answerObject = JsonSerializer.Deserialize<JsonElement>(answerJson);
126189
var ans = answerObject.GetProperty("answer").GetString() ?? throw new InvalidOperationException("Failed to get answer");
127190
var thoughts = answerObject.GetProperty("thoughts").GetString() ?? throw new InvalidOperationException("Failed to get thoughts");
@@ -130,7 +193,7 @@ You answer needs to be a json object with the following format.
130193
// add follow up questions if requested
131194
if (overrides?.SuggestFollowupQuestions is true)
132195
{
133-
var followUpQuestionChat = chat.CreateNewChat(@"You are a helpful AI assistant");
196+
var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant");
134197
followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated.
135198
# Answer
136199
{ans}
@@ -144,11 +207,11 @@ Return the follow-up question as a json string list.
144207
""What is the out-of-pocket maximum?""
145208
]");
146209

147-
var followUpQuestions = await chat.GetChatCompletionsAsync(
210+
var followUpQuestions = await chat.GetChatMessageContentAsync(
148211
followUpQuestionChat,
149212
cancellationToken: cancellationToken);
150213

151-
var followUpQuestionsJson = followUpQuestions[0].ModelResult.GetOpenAIChatResult().Choice.Message.Content;
214+
var followUpQuestionsJson = followUpQuestions.Content ?? throw new InvalidOperationException("Failed to get search query");
152215
var followUpQuestionsObject = JsonSerializer.Deserialize<JsonElement>(followUpQuestionsJson);
153216
var followUpQuestionsList = followUpQuestionsObject.EnumerateArray().Select(x => x.GetString()).ToList();
154217
foreach (var followUpQuestion in followUpQuestionsList)
@@ -158,7 +221,7 @@ Return the follow-up question as a json string list.
158221
}
159222
return new ApproachResponse(
160223
DataPoints: documentContentList,
161-
Images: null,
224+
Images: images,
162225
Answer: ans,
163226
Thoughts: thoughts,
164227
CitationBaseUrl: _configuration.ToCitationBaseUrl());

app/prepdocs/PrepareDocs/Program.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,10 @@ static async ValueTask UploadBlobsAndCreateIndexAsync(
202202
Path.GetExtension(fileName).Equals(".jpg", StringComparison.OrdinalIgnoreCase) ||
203203
Path.GetExtension(fileName).Equals(".jpeg", StringComparison.OrdinalIgnoreCase))
204204
{
205-
await embeddingService.EmbedImageBlobAsync(File.OpenRead(fileName), fileName);
205+
using var stream = File.OpenRead(fileName);
206+
var blobName = BlobNameFromFilePage(fileName);
207+
await UploadBlobAsync(fileName, blobName, container);
208+
await embeddingService.EmbedImageBlobAsync(stream, fileName);
206209
}
207210
else
208211
{

app/shared/Shared/Services/AzureSearchEmbedService.cs

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -73,53 +73,32 @@ Indexing sections from '{BlobName}' into search index '{SearchIndexName}'
7373
}
7474
}
7575

76-
public async Task<bool> EmbedImageBlobAsync(Stream imageStream, string imageName, CancellationToken ct = default)
76+
public async Task<bool> EmbedImageBlobAsync(
77+
Stream imageStream,
78+
string imageUrl,
79+
CancellationToken ct = default)
7780
{
7881
if (includeImageEmbeddingsField == false || computerVisionService is null)
7982
{
8083
throw new InvalidOperationException(
8184
"Computer Vision service is required to include image embeddings field");
8285
}
8386

84-
// step 1
85-
// upload image to blob storage
86-
var blobClient = corpusContainerClient.GetBlobClient(imageName);
87-
if (await blobClient.ExistsAsync())
88-
{
89-
logger?.LogWarning("Blob '{BlobName}' already exists", imageName);
90-
}
91-
else
92-
{
93-
logger?.LogInformation("Uploading image '{ImageName}'", imageName);
94-
await blobClient.UploadAsync(imageStream, new BlobHttpHeaders
95-
{
96-
ContentType = "image"
97-
});
98-
}
99-
100-
// step 2
101-
// get image embeddings
102-
imageStream.Position = 0;
103-
var tempPath = Path.GetTempFileName();
104-
await using var tempStream = File.OpenWrite(tempPath);
105-
await imageStream.CopyToAsync(tempStream, ct);
106-
tempStream.Close();
107-
108-
var embeddings = await computerVisionService.VectorizeImageAsync(tempPath, ct);
87+
var embeddings = await computerVisionService.VectorizeImageAsync(imageUrl, ct);
10988

11089
// id can only contain letters, digits, underscore (_), dash (-), or equal sign (=).
111-
var imageId = MatchInSetRegex().Replace(imageName, "_").TrimStart('_');
90+
var imageId = MatchInSetRegex().Replace(imageUrl, "_").TrimStart('_');
11291
// step 3
11392
// index image embeddings
11493
var indexAction = new IndexDocumentsAction<SearchDocument>(
11594
IndexActionType.MergeOrUpload,
11695
new SearchDocument
11796
{
11897
["id"] = imageId,
119-
["content"] = imageName,
98+
["content"] = imageUrl,
12099
["category"] = "image",
121100
["imageEmbedding"] = embeddings.vector,
122-
["sourcefile"] = blobClient.Uri.ToString(),
101+
["sourcefile"] = imageUrl,
123102
});
124103

125104
var batch = new IndexDocumentsBatch<SearchDocument>();
@@ -469,7 +448,7 @@ private async Task IndexSectionsAsync(IEnumerable<Section> sections)
469448
var batch = new IndexDocumentsBatch<SearchDocument>();
470449
foreach (var section in sections)
471450
{
472-
var embeddings = await openAIClient.GetEmbeddingsAsync(embeddingModelName, new Azure.AI.OpenAI.EmbeddingsOptions(section.Content.Replace('\r', ' ')));
451+
var embeddings = await openAIClient.GetEmbeddingsAsync(new Azure.AI.OpenAI.EmbeddingsOptions(embeddingModelName, [section.Content.Replace('\r', ' ')]));
473452
var embedding = embeddings.Value.Data.FirstOrDefault()?.Embedding.ToArray() ?? [];
474453
batch.Actions.Add(new IndexDocumentsAction<SearchDocument>(
475454
IndexActionType.MergeOrUpload,

app/tests/MinimalApi.Tests/AzureDocumentSearchServiceTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public async Task QueryDocumentsTestEmbeddingOnlyAsync()
4646
var openAiEmbeddingDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") ?? throw new InvalidOperationException();
4747
var openAIClient = new OpenAIClient(new Uri(openAiEndpoint), new DefaultAzureCredential());
4848
var query = "What is included in my Northwind Health Plus plan that is not in standard?";
49-
var embeddingResponse = await openAIClient.GetEmbeddingsAsync(openAiEmbeddingDeployment, new EmbeddingsOptions(query));
49+
var embeddingResponse = await openAIClient.GetEmbeddingsAsync(new EmbeddingsOptions(openAiEmbeddingDeployment, [query]));
5050
var embedding = embeddingResponse.Value.Data.First().Embedding;
5151
var searchClient = new SearchClient(new Uri(searchServceEndpoint), index, new DefaultAzureCredential());
5252
var service = new AzureSearchService(searchClient);

0 commit comments

Comments
 (0)