Skip to content

Commit bf7623f

Browse files
author
awstools
committed
feat(client-sagemaker): SageMaker Inference Recommender now decouples from Model Registry and could accept Model Name to invoke inference recommendations job; Inference Recommender now provides CPU/Memory Utilization metrics data in recommendation output.
1 parent f699098 commit bf7623f

File tree

4 files changed

+128
-10
lines changed

4 files changed

+128
-10
lines changed

clients/client-sagemaker/src/models/models_1.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2618,6 +2618,13 @@ export interface RecommendationJobContainerConfig {
26182618
* <p>A list of the instance types that are used to generate inferences in real-time.</p>
26192619
*/
26202620
SupportedInstanceTypes?: string[];
2621+
2622+
/**
2623+
* <p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.
2624+
* This field is used for optimizing your model using SageMaker Neo. For more information, see
2625+
* <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig">DataInputConfig</a>.</p>
2626+
*/
2627+
DataInputConfig?: string;
26212628
}
26222629

26232630
/**
@@ -2738,7 +2745,7 @@ export interface RecommendationJobInputConfig {
27382745
/**
27392746
* <p>The Amazon Resource Name (ARN) of a versioned model package.</p>
27402747
*/
2741-
ModelPackageVersionArn: string | undefined;
2748+
ModelPackageVersionArn?: string;
27422749

27432750
/**
27442751
* <p>Specifies the maximum duration of the job, in seconds.></p>
@@ -2816,6 +2823,11 @@ export interface RecommendationJobInputConfig {
28162823
* <p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>
28172824
*/
28182825
VpcConfig?: RecommendationJobVpcConfig;
2826+
2827+
/**
2828+
* <p>The name of the created model.</p>
2829+
*/
2830+
ModelName?: string;
28192831
}
28202832

28212833
export enum RecommendationJobType {

clients/client-sagemaker/src/models/models_2.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,6 +3337,20 @@ export interface RecommendationMetrics {
33373337
* <p>The expected model latency at maximum invocation per minute for the instance.</p>
33383338
*/
33393339
ModelLatency: number | undefined;
3340+
3341+
/**
3342+
* <p>The expected CPU utilization at maximum invocations per minute for the instance.</p>
3343+
* <p>
3344+
* <code>NaN</code> indicates that the value is not available.</p>
3345+
*/
3346+
CpuUtilization?: number;
3347+
3348+
/**
3349+
* <p>The expected memory utilization at maximum invocations per minute for the instance.</p>
3350+
* <p>
3351+
* <code>NaN</code> indicates that the value is not available.</p>
3352+
*/
3353+
MemoryUtilization?: number;
33403354
}
33413355

33423356
/**
@@ -3372,6 +3386,11 @@ export interface ModelConfiguration {
33723386
* <p>Defines the environment parameters that includes key, value types, and values.</p>
33733387
*/
33743388
EnvironmentParameters?: EnvironmentParameter[];
3389+
3390+
/**
3391+
* <p>The name of the compilation job used to create the recommended model artifacts.</p>
3392+
*/
3393+
CompilationJobName?: string;
33753394
}
33763395

33773396
/**
@@ -3392,6 +3411,11 @@ export interface InferenceRecommendation {
33923411
* <p>Defines the model configuration.</p>
33933412
*/
33943413
ModelConfiguration: ModelConfiguration | undefined;
3414+
3415+
/**
3416+
* <p>The recommendation ID which uniquely identifies each recommendation.</p>
3417+
*/
3418+
RecommendationId?: string;
33953419
}
33963420

33973421
export enum RecommendationJobStatus {

clients/client-sagemaker/src/protocols/Aws_json1_1.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13687,13 +13687,19 @@ const deserializeAws_json1_1ListInferenceRecommendationsJobStepsCommandError = a
1368713687
body: await parseErrorBody(output.body, context),
1368813688
};
1368913689
const errorCode = loadRestJsonErrorCode(output, parsedOutput.body);
13690-
const parsedBody = parsedOutput.body;
13691-
throwDefaultError({
13692-
output,
13693-
parsedBody,
13694-
exceptionCtor: __BaseException,
13695-
errorCode,
13696-
});
13690+
switch (errorCode) {
13691+
case "ResourceNotFound":
13692+
case "com.amazonaws.sagemaker#ResourceNotFound":
13693+
throw await deserializeAws_json1_1ResourceNotFoundResponse(parsedOutput, context);
13694+
default:
13695+
const parsedBody = parsedOutput.body;
13696+
throwDefaultError({
13697+
output,
13698+
parsedBody,
13699+
exceptionCtor: __BaseException,
13700+
errorCode,
13701+
});
13702+
}
1369713703
};
1369813704

1369913705
export const deserializeAws_json1_1ListLabelingJobsCommand = async (
@@ -24427,6 +24433,7 @@ const serializeAws_json1_1RecommendationJobContainerConfig = (
2442724433
context: __SerdeContext
2442824434
): any => {
2442924435
return {
24436+
...(input.DataInputConfig != null && { DataInputConfig: input.DataInputConfig }),
2443024437
...(input.Domain != null && { Domain: input.Domain }),
2443124438
...(input.Framework != null && { Framework: input.Framework }),
2443224439
...(input.FrameworkVersion != null && { FrameworkVersion: input.FrameworkVersion }),
@@ -24457,6 +24464,7 @@ const serializeAws_json1_1RecommendationJobInputConfig = (
2445724464
}),
2445824465
...(input.Endpoints != null && { Endpoints: serializeAws_json1_1Endpoints(input.Endpoints, context) }),
2445924466
...(input.JobDurationInSeconds != null && { JobDurationInSeconds: input.JobDurationInSeconds }),
24467+
...(input.ModelName != null && { ModelName: input.ModelName }),
2446024468
...(input.ModelPackageVersionArn != null && { ModelPackageVersionArn: input.ModelPackageVersionArn }),
2446124469
...(input.ResourceLimit != null && {
2446224470
ResourceLimit: serializeAws_json1_1RecommendationJobResourceLimit(input.ResourceLimit, context),
@@ -32466,6 +32474,7 @@ const deserializeAws_json1_1InferenceRecommendation = (
3246632474
output.ModelConfiguration != null
3246732475
? deserializeAws_json1_1ModelConfiguration(output.ModelConfiguration, context)
3246832476
: undefined,
32477+
RecommendationId: __expectString(output.RecommendationId),
3246932478
} as any;
3247032479
};
3247132480

@@ -34196,6 +34205,7 @@ const deserializeAws_json1_1ModelClientConfig = (output: any, context: __SerdeCo
3419634205

3419734206
const deserializeAws_json1_1ModelConfiguration = (output: any, context: __SerdeContext): ModelConfiguration => {
3419834207
return {
34208+
CompilationJobName: __expectString(output.CompilationJobName),
3419934209
EnvironmentParameters:
3420034210
output.EnvironmentParameters != null
3420134211
? deserializeAws_json1_1EnvironmentParameters(output.EnvironmentParameters, context)
@@ -36703,6 +36713,7 @@ const deserializeAws_json1_1RecommendationJobContainerConfig = (
3670336713
context: __SerdeContext
3670436714
): RecommendationJobContainerConfig => {
3670536715
return {
36716+
DataInputConfig: __expectString(output.DataInputConfig),
3670636717
Domain: __expectString(output.Domain),
3670736718
Framework: __expectString(output.Framework),
3670836719
FrameworkVersion: __expectString(output.FrameworkVersion),
@@ -36752,6 +36763,7 @@ const deserializeAws_json1_1RecommendationJobInputConfig = (
3675236763
: undefined,
3675336764
Endpoints: output.Endpoints != null ? deserializeAws_json1_1Endpoints(output.Endpoints, context) : undefined,
3675436765
JobDurationInSeconds: __expectInt32(output.JobDurationInSeconds),
36766+
ModelName: __expectString(output.ModelName),
3675536767
ModelPackageVersionArn: __expectString(output.ModelPackageVersionArn),
3675636768
ResourceLimit:
3675736769
output.ResourceLimit != null
@@ -36875,7 +36887,9 @@ const deserializeAws_json1_1RecommendationMetrics = (output: any, context: __Ser
3687536887
return {
3687636888
CostPerHour: __limitedParseFloat32(output.CostPerHour),
3687736889
CostPerInference: __limitedParseFloat32(output.CostPerInference),
36890+
CpuUtilization: __limitedParseFloat32(output.CpuUtilization),
3687836891
MaxInvocations: __expectInt32(output.MaxInvocations),
36892+
MemoryUtilization: __limitedParseFloat32(output.MemoryUtilization),
3687936893
ModelLatency: __expectInt32(output.ModelLatency),
3688036894
} as any;
3688136895
};

codegen/sdk-codegen/aws-models/sagemaker.json

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25771,6 +25771,12 @@
2577125771
"smithy.api#documentation": "<p>Defines the model configuration.</p>",
2577225772
"smithy.api#required": {}
2577325773
}
25774+
},
25775+
"RecommendationId": {
25776+
"target": "com.amazonaws.sagemaker#String",
25777+
"traits": {
25778+
"smithy.api#documentation": "<p>The recommendation ID which uniquely identifies each recommendation.</p>"
25779+
}
2577425780
}
2577525781
},
2577625782
"traits": {
@@ -30548,6 +30554,11 @@
3054830554
"output": {
3054930555
"target": "com.amazonaws.sagemaker#ListInferenceRecommendationsJobStepsResponse"
3055030556
},
30557+
"errors": [
30558+
{
30559+
"target": "com.amazonaws.sagemaker#ResourceNotFound"
30560+
}
30561+
],
3055130562
"traits": {
3055230563
"smithy.api#documentation": "<p>Returns a list of the subtasks for an Inference Recommender job.</p>\n <p>The supported subtasks are benchmarks, which evaluate the performance of your model on different instance types.</p>",
3055330564
"smithy.api#paginated": {
@@ -35334,6 +35345,12 @@
3533435345
"traits": {
3533535346
"smithy.api#documentation": "<p>Defines the environment parameters that includes key, value types, and values.</p>"
3533635347
}
35348+
},
35349+
"CompilationJobName": {
35350+
"target": "com.amazonaws.sagemaker#RecommendationJobCompilationJobName",
35351+
"traits": {
35352+
"smithy.api#documentation": "<p>The name of the compilation job used to create the recommended model artifacts.</p>"
35353+
}
3533735354
}
3533835355
},
3533935356
"traits": {
@@ -43631,6 +43648,16 @@
4363143648
"smithy.api#pattern": "^arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:inference-recommendations-job/"
4363243649
}
4363343650
},
43651+
"com.amazonaws.sagemaker#RecommendationJobCompilationJobName": {
43652+
"type": "string",
43653+
"traits": {
43654+
"smithy.api#length": {
43655+
"min": 1,
43656+
"max": 63
43657+
},
43658+
"smithy.api#pattern": "^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$"
43659+
}
43660+
},
4363443661
"com.amazonaws.sagemaker#RecommendationJobCompiledOutputConfig": {
4363543662
"type": "structure",
4363643663
"members": {
@@ -43689,12 +43716,28 @@
4368943716
"traits": {
4369043717
"smithy.api#documentation": "<p>A list of the instance types that are used to generate inferences in real-time.</p>"
4369143718
}
43719+
},
43720+
"DataInputConfig": {
43721+
"target": "com.amazonaws.sagemaker#RecommendationJobDataInputConfig",
43722+
"traits": {
43723+
"smithy.api#documentation": "<p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.\n This field is used for optimizing your model using SageMaker Neo. For more information, see\n <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig\">DataInputConfig</a>.</p>"
43724+
}
4369243725
}
4369343726
},
4369443727
"traits": {
4369543728
"smithy.api#documentation": "<p>Specifies mandatory fields for running an Inference Recommender job directly in the\n <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateInferenceRecommendationsJob.html\">CreateInferenceRecommendationsJob</a>\n API. The fields specified in <code>ContainerConfig</code> override the corresponding fields in the model package. Use\n <code>ContainerConfig</code> if you want to specify these fields for the recommendation job but don't want to edit them in your model package.</p>"
4369643729
}
4369743730
},
43731+
"com.amazonaws.sagemaker#RecommendationJobDataInputConfig": {
43732+
"type": "string",
43733+
"traits": {
43734+
"smithy.api#length": {
43735+
"min": 1,
43736+
"max": 1024
43737+
},
43738+
"smithy.api#pattern": "^[\\S\\s]+$"
43739+
}
43740+
},
4369843741
"com.amazonaws.sagemaker#RecommendationJobDescription": {
4369943742
"type": "string",
4370043743
"traits": {
@@ -43736,8 +43779,7 @@
4373643779
"ModelPackageVersionArn": {
4373743780
"target": "com.amazonaws.sagemaker#ModelPackageArn",
4373843781
"traits": {
43739-
"smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>",
43740-
"smithy.api#required": {}
43782+
"smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>"
4374143783
}
4374243784
},
4374343785
"JobDurationInSeconds": {
@@ -43787,6 +43829,12 @@
4378743829
"traits": {
4378843830
"smithy.api#documentation": "<p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>"
4378943831
}
43832+
},
43833+
"ModelName": {
43834+
"target": "com.amazonaws.sagemaker#ModelName",
43835+
"traits": {
43836+
"smithy.api#documentation": "<p>The name of the created model.</p>"
43837+
}
4379043838
}
4379143839
},
4379243840
"traits": {
@@ -44054,6 +44102,18 @@
4405444102
"smithy.api#documentation": "<p>The expected model latency at maximum invocation per minute for the instance.</p>",
4405544103
"smithy.api#required": {}
4405644104
}
44105+
},
44106+
"CpuUtilization": {
44107+
"target": "com.amazonaws.sagemaker#UtilizationMetric",
44108+
"traits": {
44109+
"smithy.api#documentation": "<p>The expected CPU utilization at maximum invocations per minute for the instance.</p>\n <p>\n <code>NaN</code> indicates that the value is not available.</p>"
44110+
}
44111+
},
44112+
"MemoryUtilization": {
44113+
"target": "com.amazonaws.sagemaker#UtilizationMetric",
44114+
"traits": {
44115+
"smithy.api#documentation": "<p>The expected memory utilization at maximum invocations per minute for the instance.</p>\n <p>\n <code>NaN</code> indicates that the value is not available.</p>"
44116+
}
4405744117
}
4405844118
},
4405944119
"traits": {
@@ -55918,6 +55978,14 @@
5591855978
"smithy.api#documentation": "<p>A collection of settings that apply to users of Amazon SageMaker Studio. These settings are\n specified when the <code>CreateUserProfile</code> API is called, and as <code>DefaultUserSettings</code>\n when the <code>CreateDomain</code> API is called.</p>\n <p>\n <code>SecurityGroups</code> is aggregated when specified in both calls. For all other\n settings in <code>UserSettings</code>, the values specified in <code>CreateUserProfile</code>\n take precedence over those specified in <code>CreateDomain</code>.</p>"
5591955979
}
5592055980
},
55981+
"com.amazonaws.sagemaker#UtilizationMetric": {
55982+
"type": "float",
55983+
"traits": {
55984+
"smithy.api#range": {
55985+
"min": 0.0
55986+
}
55987+
}
55988+
},
5592155989
"com.amazonaws.sagemaker#ValidationFraction": {
5592255990
"type": "float",
5592355991
"traits": {

0 commit comments

Comments
 (0)