fix function header (#40792)

MilesHolland · web-flow · commit 6fd58c283d54 · 2025-05-02T15:04:12.000-04:00
* fix function header

* new recordings

* upgrade name map properties

* fix name
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py
@@ -63,6 +63,7 @@ class EvaluationRunProperties:
     EVALUATION_RUN = "_azureml.evaluation_run"
     EVALUATION_SDK = "_azureml.evaluation_sdk_name"
     NAME_MAP = "_azureml.evaluation_name_map"
+    NAME_MAP_LENGTH = "_azureml.evaluation_name_map_length"
 
 
 @experimental
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -10,6 +10,7 @@
 from typing import Any, Dict, NamedTuple, Optional, Union, cast
 import uuid
 import base64
+import math
 
 import pandas as pd
 from azure.ai.evaluation._legacy._adapters.entities import Run
@@ -173,9 +174,9 @@ def _log_metrics_and_instance_results_onedp(
             EvaluationRunProperties.RUN_TYPE: "eval_run",
             EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
             EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
-            EvaluationRunProperties.NAME_MAP: json.dumps(name_map),
             "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
-        }
+        }               
+        properties.update(_convert_name_map_into_property_entries(name_map))
 
         create_evaluation_result_response = client.create_evaluation_result(
             name=uuid.uuid4(),
@@ -264,15 +265,14 @@ def _log_metrics_and_instance_results(
             # adding these properties to avoid showing traces if a dummy run is created.
             # We are doing that only for the pure evaluation runs.
             if run is None:
-                ev_run.write_properties_to_run_history(
-                    properties={
+                properties = {
                         EvaluationRunProperties.RUN_TYPE: "eval_run",
                         EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
                         EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
-                        EvaluationRunProperties.NAME_MAP: json.dumps(name_map),
                         "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
                     }
-                )
+                properties.update(_convert_name_map_into_property_entries(name_map))
+                ev_run.write_properties_to_run_history(properties=properties)
             else:
                 ev_run.write_properties_to_run_history(
                     properties={
@@ -407,6 +407,41 @@ def set_event_loop_policy() -> None:
         # On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())  # type: ignore[attr-defined]
 
+# textwrap.wrap tries to do fancy nonsense that we don't want
+def _wrap(s, w):
+    return [s[i:i + w] for i in range(0, len(s), w)]
+
+def _convert_name_map_into_property_entries(
+    name_map: Dict[str, str], segment_length: int = 950, max_segments: int = 10
+) -> Dict[str, Any]:
+    """
+    Convert the name map into property entries.
+
+    :param name_map: The name map to be converted.
+    :type name_map: Dict[str, str]
+    :param segment_length: The max length of each individual segment,
+        which will each have their own dictionary entry
+    :type segment_length: str
+    :param max_segments: The max number of segments we can have. If the stringified
+        name map is too long, we just return a length entry with a value
+        of -1 to indicate that the map was too long.
+    :type max_segments: str
+    :return: The converted name map.
+    :rtype: Dict[str, Any]
+    """
+    name_map_string = json.dumps(name_map)
+    num_segments = math.ceil(len(name_map_string) / segment_length)
+    # Property map is somehow still too long to encode within the space
+    # we allow, so give up, but make sure the service knows we gave up
+    if (num_segments > max_segments):
+        return {EvaluationRunProperties.NAME_MAP_LENGTH: -1}
+
+    result: Dict[str, Any] = {EvaluationRunProperties.NAME_MAP_LENGTH: num_segments}
+    segments_list = _wrap(name_map_string, segment_length)
+    for i in range(0, num_segments):
+        segment_key = f"{EvaluationRunProperties.NAME_MAP}_{i}"
+        result[segment_key] = segments_list[i]
+    return result
 
 class JSONLDataFileLoader:
     def __init__(self, filename: Union[os.PathLike, str]):
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal
+import test
 from azure.ai.evaluation._legacy._adapters.client import PFClient
 
 from azure.ai.evaluation._common.math import list_mean
@@ -24,12 +25,17 @@
     SelfHarmEvaluator,
     HateUnfairnessEvaluator,
 )
-from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, _AggregationType
+from azure.ai.evaluation._constants import (
+    DEFAULT_EVALUATION_RESULTS_FILE_NAME,
+    _AggregationType,
+    EvaluationRunProperties
+)
 from azure.ai.evaluation._evaluate._evaluate import (
     _aggregate_metrics,
     _apply_target_to_data,
     _rename_columns_conditionally,
 )
+from azure.ai.evaluation._evaluate._utils import _convert_name_map_into_property_entries
 from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _trace_destination_from_project_scope
 from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
 from azure.ai.evaluation._exceptions import EvaluationException
@@ -919,3 +925,42 @@ def test_evaluate_korean_characters_result(self, questions_answers_korean_file):
         assert result["rows"][0]["inputs.query"] == data_from_file["query"]
 
         os.remove(output_path)
+
+    def test_name_map_conversion(self):
+        test_map = {
+            "name1": "property1",
+            "name2": "property2",
+            "name3": "property3",
+        }
+        map_dump = json.dumps(test_map)
+
+        # Test basic
+        result = _convert_name_map_into_property_entries(test_map)
+        assert result[EvaluationRunProperties.NAME_MAP_LENGTH] == 1
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_0"] == map_dump
+
+        # Test with splits (dump of test map is 66 characters long)
+        result = _convert_name_map_into_property_entries(test_map, segment_length=40)
+        assert result[EvaluationRunProperties.NAME_MAP_LENGTH] == 2
+        combined_strings = (result[f"{EvaluationRunProperties.NAME_MAP}_0"] + 
+                            result[f"{EvaluationRunProperties.NAME_MAP}_1"])
+        #breakpoint()
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_0"] == map_dump[0:40]
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_1"] == map_dump[40:]
+        assert combined_strings == map_dump
+
+        # Test with exact split
+        result = _convert_name_map_into_property_entries(test_map, segment_length=22)
+        assert result[EvaluationRunProperties.NAME_MAP_LENGTH] == 3
+        combined_strings = (result[f"{EvaluationRunProperties.NAME_MAP}_0"] + 
+                            result[f"{EvaluationRunProperties.NAME_MAP}_1"] + 
+                            result[f"{EvaluationRunProperties.NAME_MAP}_2"])
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_0"] == map_dump[0:22]
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_1"] == map_dump[22:44]
+        assert result[f"{EvaluationRunProperties.NAME_MAP}_2"] == map_dump[44:]
+        assert combined_strings == map_dump
+
+        # Test failure case
+        result = _convert_name_map_into_property_entries(test_map, segment_length=10, max_segments = 1)
+        assert result[EvaluationRunProperties.NAME_MAP_LENGTH] == -1
+        assert len(result) == 1