Skip to content

Commit 9ace12b

Browse files
author
AWS
committed
Amazon SageMaker Service Update: SageMaker Inference Recommender introduces a new API GetScalingConfigurationRecommendation to recommend auto scaling policies based on completed Inference Recommender jobs.
1 parent 70b97be commit 9ace12b

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "Amazon SageMaker Service",
4+
"contributor": "",
5+
"description": "SageMaker Inference Recommender introduces a new API GetScalingConfigurationRecommendation to recommend auto scaling policies based on completed Inference Recommender jobs."
6+
}

services/sagemaker/src/main/resources/codegen-resources/service-2.json

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,19 @@
21242124
"output":{"shape":"GetSagemakerServicecatalogPortfolioStatusOutput"},
21252125
"documentation":"<p>Gets the status of Service Catalog in SageMaker. Service Catalog is used to create SageMaker projects.</p>"
21262126
},
2127+
"GetScalingConfigurationRecommendation":{
2128+
"name":"GetScalingConfigurationRecommendation",
2129+
"http":{
2130+
"method":"POST",
2131+
"requestUri":"/"
2132+
},
2133+
"input":{"shape":"GetScalingConfigurationRecommendationRequest"},
2134+
"output":{"shape":"GetScalingConfigurationRecommendationResponse"},
2135+
"errors":[
2136+
{"shape":"ResourceNotFound"}
2137+
],
2138+
"documentation":"<p>Starts an Amazon SageMaker Inference Recommender autoscaling recommendation job. Returns recommendations for autoscaling policies that you can apply to your SageMaker endpoint.</p>"
2139+
},
21272140
"GetSearchSuggestions":{
21282141
"name":"GetSearchSuggestions",
21292142
"http":{
@@ -9650,6 +9663,24 @@
96509663
"min":1,
96519664
"pattern":"^([\\p{L}\\p{Z}\\p{N}_.:\\/=+\\-@]*)${1,256}"
96529665
},
9666+
"CustomizedMetricSpecification":{
9667+
"type":"structure",
9668+
"members":{
9669+
"MetricName":{
9670+
"shape":"String",
9671+
"documentation":"<p>The name of the customized metric.</p>"
9672+
},
9673+
"Namespace":{
9674+
"shape":"String",
9675+
"documentation":"<p>The namespace of the customized metric.</p>"
9676+
},
9677+
"Statistic":{
9678+
"shape":"Statistic",
9679+
"documentation":"<p>The statistic of the customized metric.</p>"
9680+
}
9681+
},
9682+
"documentation":"<p>A customized metric.</p>"
9683+
},
96539684
"DataCaptureConfig":{
96549685
"type":"structure",
96559686
"required":[
@@ -15279,6 +15310,7 @@
1527915310
"Delete_Failed"
1528015311
]
1528115312
},
15313+
"Double":{"type":"double"},
1528215314
"DoubleParameterValue":{"type":"double"},
1528315315
"DriftCheckBaselines":{
1528415316
"type":"structure",
@@ -15362,6 +15394,32 @@
1536215394
},
1536315395
"documentation":"<p>Represents the drift check model quality baselines that can be used when the model monitor is set using the model package. </p>"
1536415396
},
15397+
"DynamicScalingConfiguration":{
15398+
"type":"structure",
15399+
"members":{
15400+
"MinCapacity":{
15401+
"shape":"Integer",
15402+
"documentation":"<p>The recommended minimum capacity to specify for your autoscaling policy.</p>"
15403+
},
15404+
"MaxCapacity":{
15405+
"shape":"Integer",
15406+
"documentation":"<p>The recommended maximum capacity to specify for your autoscaling policy.</p>"
15407+
},
15408+
"ScaleInCooldown":{
15409+
"shape":"Integer",
15410+
"documentation":"<p>The recommended scale in cooldown time for your autoscaling policy.</p>"
15411+
},
15412+
"ScaleOutCooldown":{
15413+
"shape":"Integer",
15414+
"documentation":"<p>The recommended scale out cooldown time for your autoscaling policy.</p>"
15415+
},
15416+
"ScalingPolicies":{
15417+
"shape":"ScalingPolicies",
15418+
"documentation":"<p>An object of the scaling policies for each metric.</p>"
15419+
}
15420+
},
15421+
"documentation":"<p>An object with the recommended values for you to specify when creating an autoscaling policy.</p>"
15422+
},
1536515423
"EMRStepMetadata":{
1536615424
"type":"structure",
1536715425
"members":{
@@ -17149,6 +17207,65 @@
1714917207
}
1715017208
}
1715117209
},
17210+
"GetScalingConfigurationRecommendationRequest":{
17211+
"type":"structure",
17212+
"required":["InferenceRecommendationsJobName"],
17213+
"members":{
17214+
"InferenceRecommendationsJobName":{
17215+
"shape":"RecommendationJobName",
17216+
"documentation":"<p>The name of a previously completed Inference Recommender job.</p>"
17217+
},
17218+
"RecommendationId":{
17219+
"shape":"String",
17220+
"documentation":"<p>The recommendation ID of a previously completed inference recommendation. This ID should come from one of the recommendations returned by the job specified in the <code>InferenceRecommendationsJobName</code> field.</p> <p>Specify either this field or the <code>EndpointName</code> field.</p>"
17221+
},
17222+
"EndpointName":{
17223+
"shape":"EndpointName",
17224+
"documentation":"<p>The name of an endpoint benchmarked during a previously completed inference recommendation job. This name should come from one of the recommendations returned by the job specified in the <code>InferenceRecommendationsJobName</code> field.</p> <p>Specify either this field or the <code>RecommendationId</code> field.</p>"
17225+
},
17226+
"TargetCpuUtilizationPerCore":{
17227+
"shape":"UtilizationPercentagePerCore",
17228+
"documentation":"<p>The percentage of how much utilization you want an instance to use before autoscaling. The default value is 50%.</p>"
17229+
},
17230+
"ScalingPolicyObjective":{
17231+
"shape":"ScalingPolicyObjective",
17232+
"documentation":"<p>An object where you specify the anticipated traffic pattern for an endpoint.</p>"
17233+
}
17234+
}
17235+
},
17236+
"GetScalingConfigurationRecommendationResponse":{
17237+
"type":"structure",
17238+
"members":{
17239+
"InferenceRecommendationsJobName":{
17240+
"shape":"RecommendationJobName",
17241+
"documentation":"<p>The name of a previously completed Inference Recommender job.</p>"
17242+
},
17243+
"RecommendationId":{
17244+
"shape":"String",
17245+
"documentation":"<p>The recommendation ID of a previously completed inference recommendation.</p>"
17246+
},
17247+
"EndpointName":{
17248+
"shape":"EndpointName",
17249+
"documentation":"<p>The name of an endpoint benchmarked during a previously completed Inference Recommender job.</p>"
17250+
},
17251+
"TargetCpuUtilizationPerCore":{
17252+
"shape":"UtilizationPercentagePerCore",
17253+
"documentation":"<p>The percentage of how much utilization you want an instance to use before autoscaling, which you specified in the request. The default value is 50%.</p>"
17254+
},
17255+
"ScalingPolicyObjective":{
17256+
"shape":"ScalingPolicyObjective",
17257+
"documentation":"<p>An object representing the anticipated traffic pattern for an endpoint that you specified in the request.</p>"
17258+
},
17259+
"Metric":{
17260+
"shape":"ScalingPolicyMetric",
17261+
"documentation":"<p>An object with a list of metrics that were benchmarked during the previously completed Inference Recommender job.</p>"
17262+
},
17263+
"DynamicScalingConfiguration":{
17264+
"shape":"DynamicScalingConfiguration",
17265+
"documentation":"<p>An object with the recommended values for you to specify when creating an autoscaling policy.</p>"
17266+
}
17267+
}
17268+
},
1715217269
"GetSearchSuggestionsRequest":{
1715317270
"type":"structure",
1715417271
"required":["Resource"],
@@ -23804,6 +23921,21 @@
2380423921
"Test"
2380523922
]
2380623923
},
23924+
"MetricSpecification":{
23925+
"type":"structure",
23926+
"members":{
23927+
"Predefined":{
23928+
"shape":"PredefinedMetricSpecification",
23929+
"documentation":"<p>Information about a predefined metric.</p>"
23930+
},
23931+
"Customized":{
23932+
"shape":"CustomizedMetricSpecification",
23933+
"documentation":"<p>Information about a customized metric.</p>"
23934+
}
23935+
},
23936+
"documentation":"<p>An object containing information about a metric.</p>",
23937+
"union":true
23938+
},
2380723939
"MetricValue":{"type":"float"},
2380823940
"MetricsSource":{
2380923941
"type":"structure",
@@ -27433,6 +27565,16 @@
2743327565
"min":1,
2743427566
"pattern":".*"
2743527567
},
27568+
"PredefinedMetricSpecification":{
27569+
"type":"structure",
27570+
"members":{
27571+
"PredefinedMetricType":{
27572+
"shape":"String",
27573+
"documentation":"<p>The metric type. You can only apply SageMaker metric types to SageMaker endpoints.</p>"
27574+
}
27575+
},
27576+
"documentation":"<p>A specification for a predefined metric.</p>"
27577+
},
2743627578
"PresignedDomainUrl":{"type":"string"},
2743727579
"ProbabilityThresholdAttribute":{"type":"double"},
2743827580
"ProblemType":{
@@ -30010,6 +30152,49 @@
3001030152
"max":100,
3001130153
"min":0
3001230154
},
30155+
"ScalingPolicies":{
30156+
"type":"list",
30157+
"member":{"shape":"ScalingPolicy"}
30158+
},
30159+
"ScalingPolicy":{
30160+
"type":"structure",
30161+
"members":{
30162+
"TargetTracking":{
30163+
"shape":"TargetTrackingScalingPolicyConfiguration",
30164+
"documentation":"<p>A target tracking scaling policy. Includes support for predefined or customized metrics.</p>"
30165+
}
30166+
},
30167+
"documentation":"<p>An object containing a recommended scaling policy.</p>",
30168+
"union":true
30169+
},
30170+
"ScalingPolicyMetric":{
30171+
"type":"structure",
30172+
"members":{
30173+
"InvocationsPerInstance":{
30174+
"shape":"Integer",
30175+
"documentation":"<p>The number of invocations sent to a model, normalized by <code>InstanceCount</code> in each ProductionVariant. <code>1/numberOfInstances</code> is sent as the value on each request, where <code>numberOfInstances</code> is the number of active instances for the ProductionVariant behind the endpoint at the time of the request.</p>"
30176+
},
30177+
"ModelLatency":{
30178+
"shape":"Integer",
30179+
"documentation":"<p>The interval of time taken by a model to respond as viewed from SageMaker. This interval includes the local communication times taken to send the request and to fetch the response from the container of a model and the time taken to complete the inference in the container.</p>"
30180+
}
30181+
},
30182+
"documentation":"<p>The metric for a scaling policy.</p>"
30183+
},
30184+
"ScalingPolicyObjective":{
30185+
"type":"structure",
30186+
"members":{
30187+
"MinInvocationsPerMinute":{
30188+
"shape":"Integer",
30189+
"documentation":"<p>The minimum number of expected requests to your endpoint per minute.</p>"
30190+
},
30191+
"MaxInvocationsPerMinute":{
30192+
"shape":"Integer",
30193+
"documentation":"<p>The maximum number of expected requests to your endpoint per minute.</p>"
30194+
}
30195+
},
30196+
"documentation":"<p>An object where you specify the anticipated traffic pattern for an endpoint.</p>"
30197+
},
3001330198
"ScheduleConfig":{
3001430199
"type":"structure",
3001530200
"required":["ScheduleExpression"],
@@ -30853,6 +31038,16 @@
3085331038
}
3085431039
}
3085531040
},
31041+
"Statistic":{
31042+
"type":"string",
31043+
"enum":[
31044+
"Average",
31045+
"Minimum",
31046+
"Maximum",
31047+
"SampleCount",
31048+
"Sum"
31049+
]
31050+
},
3085631051
"StatusDetails":{
3085731052
"type":"string",
3085831053
"max":1024,
@@ -31450,6 +31645,20 @@
3145031645
"LINUX"
3145131646
]
3145231647
},
31648+
"TargetTrackingScalingPolicyConfiguration":{
31649+
"type":"structure",
31650+
"members":{
31651+
"MetricSpecification":{
31652+
"shape":"MetricSpecification",
31653+
"documentation":"<p>An object containing information about a metric.</p>"
31654+
},
31655+
"TargetValue":{
31656+
"shape":"Double",
31657+
"documentation":"<p>The recommended target value to specify for the metric when creating a scaling policy.</p>"
31658+
}
31659+
},
31660+
"documentation":"<p>A target tracking scaling policy. Includes support for predefined or customized metrics.</p> <p>When using the <a href=\"https://docs.aws.amazon.com/autoscaling/application/APIReference/API_PutScalingPolicy.html\">PutScalingPolicy</a> API, this parameter is required when you are creating a policy with the policy type <code>TargetTrackingScaling</code>.</p>"
31661+
},
3145331662
"TaskAvailabilityLifetimeInSeconds":{
3145431663
"type":"integer",
3145531664
"min":60
@@ -34382,6 +34591,11 @@
3438234591
"type":"float",
3438334592
"min":0.0
3438434593
},
34594+
"UtilizationPercentagePerCore":{
34595+
"type":"integer",
34596+
"max":100,
34597+
"min":1
34598+
},
3438534599
"ValidationFraction":{
3438634600
"type":"float",
3438734601
"max":1,

0 commit comments

Comments
 (0)