Add async support for _SafetyEvaluation (#40623)

slister1001 · web-flow · commit f32a6c2a97d6 · 2025-04-21T13:33:47.000-07:00
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py
@@ -6,9 +6,10 @@
 import os
 import inspect
 import logging
+import asyncio
 from datetime import datetime
 from azure.ai.evaluation._common._experimental import experimental
-from typing import Any, Callable, Dict, List, Optional, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Union, cast, Coroutine, TypeVar, Awaitable
 from azure.ai.evaluation._common.math import list_mean_nan_safe
 from azure.ai.evaluation._constants import CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT
 from azure.ai.evaluation._evaluators import (
@@ -192,10 +193,17 @@ async def callback(
             context = latest_message.get("context", None)
             latest_context = None
             try:
+                is_async = self._is_async_function(target)
                 if self._check_target_returns_context(target):
-                    response, latest_context = target(query=application_input)
+                    if is_async:
+                        response, latest_context = await target(query=application_input)
+                    else:
+                        response, latest_context = target(query=application_input)
                 else:
-                    response = target(query=application_input)
+                    if is_async:
+                        response = await target(query=application_input)
+                    else:
+                        response = target(query=application_input)
             except Exception as e:
                 response = f"Something went wrong {e!s}"
 
@@ -465,7 +473,7 @@ def _get_evaluators(
                     blame=ErrorBlame.USER_ERROR,
                 )
         return evaluators_dict
-
+    
     @staticmethod
     def _check_target_returns_context(target: Callable) -> bool:
         """
@@ -478,6 +486,15 @@ def _check_target_returns_context(target: Callable) -> bool:
         ret_type = sig.return_annotation
         if ret_type == inspect.Signature.empty:
             return False
+        
+        # Check for Coroutine/Awaitable return types for async functions
+        origin = getattr(ret_type, "__origin__", None)
+        if origin is not None and (origin is Coroutine or origin is Awaitable):
+            args = getattr(ret_type, "__args__", None)
+            if args and len(args) > 0:
+                # For async functions, check the actual return type inside the Coroutine
+                ret_type = args[-1]
+        
         if ret_type is tuple:
             return True
         return False
@@ -494,13 +511,33 @@ def _check_target_returns_str(target: Callable) -> bool:
         ret_type = sig.return_annotation
         if ret_type == inspect.Signature.empty:
             return False
+        
+        # Check for Coroutine/Awaitable return types for async functions
+        origin = getattr(ret_type, "__origin__", None)
+        if origin is not None and (origin is Coroutine or origin is Awaitable):
+            args = getattr(ret_type, "__args__", None)
+            if args and len(args) > 0:
+                # For async functions, check the actual return type inside the Coroutine
+                ret_type = args[-1]
+                
         if ret_type is str:
             return True
         return False
     
-     
     @staticmethod
-    def _check_target_is_callback(target:Callable) -> bool:
+    def _is_async_function(target: Callable) -> bool:
+        """
+        Checks if the target function is an async function.
+        
+        :param target: The target function to check.
+        :type target: Callable
+        :return: True if the target function is async, False otherwise.
+        :rtype: bool
+        """
+        return asyncio.iscoroutinefunction(target)
+    
+    @staticmethod
+    def _check_target_is_callback(target: Callable) -> bool:
         sig = inspect.signature(target)
         param_names = list(sig.parameters.keys())
         return 'messages' in param_names and 'stream' in param_names and 'session_state' in param_names and 'context' in param_names
@@ -630,7 +667,7 @@ def _calculate_defect_rate(self, evaluation_result_dict) -> EvaluationResult:
     
     async def __call__(
             self,
-            target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+            target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
             evaluators: List[_SafetyEvaluator] = [],
             evaluation_name: Optional[str] = None,
             num_turns : int = 1,
@@ -644,12 +681,12 @@ async def __call__(
             jailbreak_data_path: Optional[Union[str, os.PathLike]] = None,
             output_path: Optional[Union[str, os.PathLike]] = None,
             data_paths: Optional[Union[Dict[str, str], Dict[str, Union[str,os.PathLike]]]] = None
-        ) -> Union[Dict[str, EvaluationResult], Dict[str, str], Dict[str, Union[str,os.PathLike]]]:
+        ) -> Union[Dict[str, EvaluationResult], Dict[str, str], Dict[str, Union[str,os.PathLike]]]:        
         '''
         Evaluates the target function based on the provided parameters.
 
-        :param target: The target function to call during the evaluation.
-        :type target: Callable
+        :param target: The target function to call during the evaluation. This can be a synchronous or asynchronous function.
+        :type target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
         :param evaluators: A list of SafetyEvaluator.
         :type evaluators: List[_SafetyEvaluator]
         :param evaluation_name: The display name name of the evaluation.
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py
@@ -45,6 +45,22 @@ def mock_target_with_context_fn() -> tuple:
     return mock_target_with_context_fn
 
 
+@pytest.fixture
+def mock_async_target():
+    async def mock_async_target_fn(query: str) -> str:
+        return "mock async response"
+
+    return mock_async_target_fn
+
+
+@pytest.fixture
+def mock_async_target_with_context():
+    async def mock_async_target_with_context_fn(query: str) -> tuple:
+        return ("mock async response", "mock async context")
+
+    return mock_async_target_with_context_fn
+
+
 @pytest.fixture
 def mock_eval_result_dict():
     jailbreak = {
@@ -122,6 +138,16 @@ def test_check_target_returns_context_false(self, safety_eval, mock_target):
     def test_check_target_returns_context_true(self, safety_eval, mock_target_with_context):
         assert safety_eval._check_target_returns_context(mock_target_with_context)
 
+    def test_check_target_returns_context_async(self, safety_eval, mock_async_target, mock_async_target_with_context):
+        # Test that async function without context returns False
+        assert not safety_eval._check_target_returns_context(mock_async_target)
+        # Test that async function with context returns True
+        assert safety_eval._check_target_returns_context(mock_async_target_with_context)
+
+    def test_check_target_returns_str_async(self, safety_eval, mock_async_target):
+        # Test that async function returning string returns True
+        assert safety_eval._check_target_returns_str(mock_async_target)
+
     def test_validate_inputs_groundedness_no_source(self, safety_eval, mock_target):
         with pytest.raises(EvaluationException) as exc_info:
             safety_eval._validate_inputs(
@@ -243,3 +269,32 @@ async def test_simulate_no_results(self, mock_call, mock_init, safety_eval, mock
                 target=mock_target, adversarial_scenario=AdversarialScenario.ADVERSARIAL_QA
             )
         assert "outputs generated by the simulator" in str(exc_info.value)
+
+    def test_is_async_function(self, safety_eval, mock_target, mock_async_target):
+        # Test that sync function returns False
+        assert not safety_eval._is_async_function(mock_target)
+        # Test that async function returns True
+        assert safety_eval._is_async_function(mock_async_target)
+
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation._safety_evaluation._safety_evaluation._SafetyEvaluation._simulate")
+    @patch("azure.ai.evaluation._evaluate._evaluate.evaluate")
+    async def test_call_with_async_target(self, mock_evaluate, mock_simulate, safety_eval, mock_async_target):
+        # Setup mocks
+        mock_simulate.return_value = {"MockSimulator": "MockSimulator_Data.jsonl"}
+        mock_evaluate.return_value = {
+            "metrics": {},
+            "rows": [],
+            "studio_url": "test_url"
+        }
+        
+        # Call the __call__ method with an async target
+        result = await safety_eval(target=mock_async_target)
+        
+        # Verify the results
+        assert isinstance(result, dict)
+        assert "MockSimulator" in result
+        
+        # Verify that _simulate was called with the async target
+        mock_simulate.assert_called_once()
+        assert mock_simulate.call_args[1]["target"] == mock_async_target