Skip to content

Commit a55603d

Browse files
authored
Allow for alphanumeric in column mapping (#40556)
* Allow for alphanumeric in column mapping * add unit test
1 parent acc0d4a commit a55603d

File tree

4 files changed

+49
-4
lines changed

4 files changed

+49
-4
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
### Breaking Changes
99

1010
### Bugs Fixed
11+
- Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
12+
```
13+
"query1": "some query", "response: "some response"
14+
```
15+
throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
16+
Now, users may import data containing fields with numeric characters.
17+
1118
1219
### Other Changes
1320

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ def _process_column_mappings(
618618

619619
processed_config: Dict[str, Dict[str, str]] = {}
620620

621-
expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z_]+\}$")
621+
expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z0-9_]+\}$")
622622

623623
if column_mapping:
624624
for evaluator, mapping_config in column_mapping.items():
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"query456":"How do you create a run?","context789":"AML API only","response123":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "ground_truth":"Paris is the capital of France."}
2+
{"query456":"How do you log a model?","context789":"Logging can be done using any OSS Sdk","response123":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context789()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context789()` retrieves the current run context789 object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
3+
{"query456":"What is the capital of France?","context789":"France is in Europe","response123":"Paris is the capital of France.", "ground_truth":"Paris is the capital of France."}

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ def evaluate_test_data_jsonl_file():
7070
def evaluate_test_data_conversion_jsonl_file():
7171
return _get_file("evaluate_test_data_conversation.jsonl")
7272

73+
@pytest.fixture
74+
def evaluate_test_data_alphanumeric():
75+
return _get_file("evaluate_test_data_alphanumeric.jsonl")
7376

7477
@pytest.fixture
7578
def questions_file():
@@ -414,10 +417,42 @@ def test_evaluate_invalid_column_mapping(self, mock_model_config, evaluate_test_
414417
},
415418
)
416419

417-
assert (
418-
"Unexpected references detected in 'column_mapping'. Ensure only ${target.} and ${data.} are used."
419-
in exc_info.value.args[0]
420+
assert (
421+
"Unexpected references detected in 'column_mapping'. Ensure only ${target.} and ${data.} are used."
422+
in exc_info.value.args[0]
423+
)
424+
425+
def test_evaluate_valid_column_mapping_with_numeric_chars(self, mock_model_config, evaluate_test_data_alphanumeric):
426+
# Valid column mappings that include numeric characters
427+
# This test validates the fix for the regex pattern that now accepts numeric characters
428+
# Previous regex was `re.compile(r"^\$\{(target|data)\.[a-zA-Z_]+\}$")`
429+
# New regex is `re.compile(r"^\$\{(target|data)\.[a-zA-Z0-9_]+\}$")`
430+
431+
column_mappings_with_numbers = {
432+
"response": "${data.response123}",
433+
"query": "${data.query456}",
434+
"context": "${data.context789}"
435+
} # This should not raise an exception with the updated regex for column mapping format validation
436+
# The test passes if no exception about "Unexpected references" is raised
437+
result = evaluate(
438+
data=evaluate_test_data_alphanumeric,
439+
evaluators={"g": GroundednessEvaluator(model_config=mock_model_config)},
440+
evaluator_config={
441+
"g": {
442+
"column_mapping": column_mappings_with_numbers,
443+
}
444+
},
445+
fail_on_evaluator_errors=False
420446
)
447+
448+
# Verify that the test completed without errors related to column mapping format
449+
# The test data has the fields with numeric characters, so it should work correctly
450+
assert result is not None
451+
# Verify we're getting data from the numerically-named fields
452+
row_result_df = pd.DataFrame(result["rows"])
453+
assert "inputs.response123" in row_result_df.columns
454+
assert "inputs.query456" in row_result_df.columns
455+
assert "inputs.context789" in row_result_df.columns
421456

422457
def test_renaming_column(self):
423458
"""Test that the columns are renamed correctly."""

0 commit comments

Comments
 (0)