Skip to content

Commit 2a2b7eb

Browse files
committed
don't ascii encode unicode chars in prompts and completions
1 parent 4af9188 commit 2a2b7eb

File tree

3 files changed

+63
-10
lines changed

3 files changed

+63
-10
lines changed

sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def _add_request_chat_message_events(self, span: "AbstractSpan", **kwargs: Any)
208208
f"gen_ai.{message.get('role')}.message",
209209
{
210210
"gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
211-
"gen_ai.event.content": json.dumps(message),
211+
"gen_ai.event.content": json.dumps(message, ensure_ascii=False),
212212
},
213213
timestamp,
214214
)
@@ -300,7 +300,7 @@ def _add_response_chat_message_events(
300300
full_response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
301301
attributes = {
302302
"gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
303-
"gen_ai.event.content": json.dumps(full_response),
303+
"gen_ai.event.content": json.dumps(full_response, ensure_ascii=False),
304304
}
305305
else:
306306
response: Dict[str, Any] = {
@@ -318,7 +318,7 @@ def _add_response_chat_message_events(
318318

319319
attributes = {
320320
"gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
321-
"gen_ai.event.content": json.dumps(response),
321+
"gen_ai.event.content": json.dumps(response, ensure_ascii=False),
322322
}
323323
last_event_timestamp_ns = self._record_event(span, "gen_ai.choice", attributes, last_event_timestamp_ns)
324324

@@ -478,7 +478,7 @@ def __iter__( # pyright: ignore [reportIncompatibleMethodOverride]
478478
)
479479
attributes = {
480480
"gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
481-
"gen_ai.event.content": json.dumps(accumulate),
481+
"gen_ai.event.content": json.dumps(accumulate, ensure_ascii=False),
482482
}
483483
self._instrumentor._record_event(span, "gen_ai.choice", attributes, previous_event_timestamp)
484484
span.finish()
@@ -532,7 +532,7 @@ def _trace_stream_content(self) -> None:
532532
self._accumulate["message"]["tool_calls"] = list(tools_no_recording)
533533
attributes = {
534534
"gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
535-
"gen_ai.event.content": json.dumps(self._accumulate),
535+
"gen_ai.event.content": json.dumps(self._accumulate, ensure_ascii=False),
536536
}
537537
self._last_event_timestamp_ns = self._instrumentor._record_event( # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess]
538538
span, "gen_ai.choice", attributes, self._last_event_timestamp_ns

sdk/ai/azure-ai-inference/tests/test_client_tracing.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (c) Microsoft Corporation.
44
# Licensed under the MIT License.
55
# ------------------------------------
6+
import json
67
import os
78
import azure.ai.inference as sdk
89
from azure.ai.inference.tracing import AIInferenceInstrumentor
@@ -322,6 +323,58 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
322323
assert events_match == True
323324
AIInferenceInstrumentor().uninstrument()
324325

326+
@ServicePreparerChatCompletions()
327+
@recorded_by_proxy
328+
def test_chat_completion_tracing_content_unicode(self, **kwargs):
329+
# Make sure code is not instrumented due to a previous test exception
330+
try:
331+
AIInferenceInstrumentor().uninstrument()
332+
except RuntimeError as e:
333+
pass
334+
self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
335+
client = self._create_chat_client(**kwargs)
336+
processor, exporter = self.setup_memory_trace_exporter()
337+
AIInferenceInstrumentor().instrument()
338+
response = client.complete(
339+
messages=[
340+
sdk.models.SystemMessage(content="You are a helpful assistant."),
341+
sdk.models.UserMessage(content="将“hello world”翻译成中文和乌克兰语"),
342+
],
343+
)
344+
processor.force_flush()
345+
spans = exporter.get_spans_by_name_starts_with("chat ")
346+
assert len(spans) == 1
347+
expected_events = [
348+
{
349+
"name": "gen_ai.system.message",
350+
"attributes": {
351+
"gen_ai.system": "az.ai.inference",
352+
"gen_ai.event.content": '{"role": "system", "content": "You are a helpful assistant."}',
353+
},
354+
},
355+
{
356+
"name": "gen_ai.user.message",
357+
"attributes": {
358+
"gen_ai.system": "az.ai.inference",
359+
"gen_ai.event.content": '{"role": "user", "content": "将“hello world”翻译成中文和乌克兰语"}',
360+
},
361+
},
362+
{
363+
"name": "gen_ai.choice",
364+
"attributes": {
365+
"gen_ai.system": "az.ai.inference",
366+
"gen_ai.event.content": '{"message": {"content": "*"}, "finish_reason": "stop", "index": 0}',
367+
},
368+
},
369+
]
370+
events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
371+
assert events_match == True
372+
373+
completion_event_content = json.loads(spans[0].events[2].attributes["gen_ai.event.content"])
374+
assert False == completion_event_content["message"]["content"].isascii()
375+
assert response.choices[0].message.content == completion_event_content["message"]["content"]
376+
AIInferenceInstrumentor().uninstrument()
377+
325378
@ServicePreparerChatCompletions()
326379
@recorded_by_proxy
327380
def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs):

sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/agents/_ai_agents_instrumentor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def _add_message_event(
343343
message_status=message_status,
344344
usage=usage,
345345
)
346-
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body)
346+
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body, ensure_ascii=False)
347347
span.span_instance.add_event(name=f"gen_ai.{role}.message", attributes=attributes)
348348

349349
def _get_field(self, obj: Any, field: str) -> Any:
@@ -374,7 +374,7 @@ def _add_instructions_event(
374374
event_body["content"] = instructions or additional_instructions
375375

376376
attributes = self._create_event_attributes(agent_id=agent_id, thread_id=thread_id)
377-
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body)
377+
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body, ensure_ascii=False)
378378
span.span_instance.add_event(name=GEN_AI_SYSTEM_MESSAGE, attributes=attributes)
379379

380380
def _get_role(self, role: Optional[Union[str, MessageRole]]) -> str:
@@ -413,10 +413,10 @@ def _add_tool_assistant_message_event(self, span, step: RunStep) -> None:
413413
)
414414

415415
if _trace_agents_content:
416-
attributes[GEN_AI_EVENT_CONTENT] = json.dumps({"tool_calls": tool_calls})
416+
attributes[GEN_AI_EVENT_CONTENT] = json.dumps({"tool_calls": tool_calls}, ensure_ascii=False)
417417
else:
418418
tool_calls_non_recording = self._remove_function_call_names_and_arguments(tool_calls=tool_calls)
419-
attributes[GEN_AI_EVENT_CONTENT] = json.dumps({"tool_calls": tool_calls_non_recording})
419+
attributes[GEN_AI_EVENT_CONTENT] = json.dumps({"tool_calls": tool_calls_non_recording}, ensure_ascii=False)
420420
span.span_instance.add_event(name="gen_ai.assistant.message", attributes=attributes)
421421

422422
def set_end_run(self, span: "AbstractSpan", run: Optional[ThreadRun]) -> None:
@@ -518,7 +518,7 @@ def _add_tool_message_events(
518518
body = {"content": tool_output["output"], "id": tool_output["tool_call_id"]}
519519
else:
520520
body = {"content": "", "id": tool_output["tool_call_id"]}
521-
span.span_instance.add_event("gen_ai.tool.message", {"gen_ai.event.content": json.dumps(body)})
521+
span.span_instance.add_event("gen_ai.tool.message", {"gen_ai.event.content": json.dumps(body, ensure_ascii=False)})
522522
return True
523523

524524
return False

0 commit comments

Comments
 (0)